TDMatDVecMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDVECMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/blas/gemv.h>
44 #include <blaze/math/blas/trmv.h>
55 #include <blaze/math/Functions.h>
56 #include <blaze/math/Intrinsics.h>
57 #include <blaze/math/shims/Reset.h>
79 #include <blaze/system/BLAS.h>
82 #include <blaze/util/Assert.h>
83 #include <blaze/util/Complex.h>
86 #include <blaze/util/DisableIf.h>
87 #include <blaze/util/EnableIf.h>
88 #include <blaze/util/Exception.h>
90 #include <blaze/util/SelectType.h>
91 #include <blaze/util/Types.h>
100 
101 
102 namespace blaze {
103 
104 //=================================================================================================
105 //
106 // CLASS TDMATDVECMULTEXPR
107 //
108 //=================================================================================================
109 
110 //*************************************************************************************************
117 template< typename MT // Type of the left-hand side dense matrix
118  , typename VT > // Type of the right-hand side dense vector
119 class TDMatDVecMultExpr : public DenseVector< TDMatDVecMultExpr<MT,VT>, false >
120  , private MatVecMultExpr
121  , private Computation
122 {
123  private:
124  //**Type definitions****************************************************************************
125  typedef typename MT::ResultType MRT;
126  typedef typename VT::ResultType VRT;
127  typedef typename MRT::ElementType MET;
128  typedef typename VRT::ElementType VET;
129  typedef typename MT::CompositeType MCT;
130  typedef typename VT::CompositeType VCT;
131  //**********************************************************************************************
132 
133  //**********************************************************************************************
135  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
137  //**********************************************************************************************
138 
139  //**********************************************************************************************
141  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
142  //**********************************************************************************************
143 
144  //**********************************************************************************************
146 
150  template< typename T1 >
151  struct UseSMPAssign {
152  enum { value = ( evaluateMatrix || evaluateVector ) };
153  };
155  //**********************************************************************************************
156 
157  //**********************************************************************************************
159 
162  template< typename T1, typename T2, typename T3 >
163  struct UseBlasKernel {
164  enum { value = BLAZE_BLAS_MODE &&
169  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
175  };
177  //**********************************************************************************************
178 
179  //**********************************************************************************************
181 
185  template< typename T1, typename T2, typename T3 >
186  struct UseVectorizedDefaultKernel {
187  enum { value = useOptimizedKernels &&
189  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
194  };
196  //**********************************************************************************************
197 
198  public:
199  //**Type definitions****************************************************************************
205  typedef const ElementType ReturnType;
206  typedef const ResultType CompositeType;
207 
209  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type LeftOperand;
210 
212  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type RightOperand;
213 
216 
219  //**********************************************************************************************
220 
221  //**Compilation flags***************************************************************************
223  enum { vectorizable = !IsDiagonal<MT>::value &&
224  MT::vectorizable && VT::vectorizable &&
228 
230  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
231  !evaluateVector && VT::smpAssignable };
232  //**********************************************************************************************
233 
234  //**Constructor*********************************************************************************
240  explicit inline TDMatDVecMultExpr( const MT& mat, const VT& vec )
241  : mat_( mat ) // Left-hand side dense matrix of the multiplication expression
242  , vec_( vec ) // Right-hand side dense vector of the multiplication expression
243  {
244  BLAZE_INTERNAL_ASSERT( mat_.columns() == vec_.size(), "Invalid matrix and vector sizes" );
245  }
246  //**********************************************************************************************
247 
248  //**Subscript operator**************************************************************************
254  inline ReturnType operator[]( size_t index ) const {
255  BLAZE_INTERNAL_ASSERT( index < mat_.rows(), "Invalid vector access index" );
256 
257  if( ( IsStrictlyLower<MT>::value && index == 0UL ) ||
258  ( IsStrictlyUpper<MT>::value && index == mat_.rows()-1UL ) ||
259  mat_.columns() == 0UL )
260  return ElementType();
261 
263  return mat_(index,index) * vec_[index];
264 
265  const size_t jbegin( ( IsUpper<MT>::value )
266  ?( IsStrictlyUpper<MT>::value ? index+1UL : index )
267  :( 0UL ) );
268  const size_t jend( ( IsLower<MT>::value )
269  ?( IsStrictlyLower<MT>::value ? index : index+1UL )
270  :( mat_.columns() ) );
271  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
272 
273  const size_t jnum( jend - jbegin );
274  const size_t jpos( jbegin + ( ( jnum - 1UL ) & size_t(-2) ) + 1UL );
275 
276  ElementType res( mat_(index,jbegin) * vec_[jbegin] );
277 
278  for( size_t j=jbegin+1UL; j<jpos; j+=2UL ) {
279  res += mat_(index,j) * vec_[j] + mat_(index,j+1) * vec_[j+1UL];
280  }
281  if( jpos < jend ) {
282  res += mat_(index,jpos) * vec_[jpos];
283  }
284 
285  return res;
286  }
287  //**********************************************************************************************
288 
289  //**At function*********************************************************************************
296  inline ReturnType at( size_t index ) const {
297  if( index >= mat_.rows() ) {
298  BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
299  }
300  return (*this)[index];
301  }
302  //**********************************************************************************************
303 
304  //**Size function*******************************************************************************
309  inline size_t size() const {
310  return mat_.rows();
311  }
312  //**********************************************************************************************
313 
314  //**Left operand access*************************************************************************
319  inline LeftOperand leftOperand() const {
320  return mat_;
321  }
322  //**********************************************************************************************
323 
324  //**Right operand access************************************************************************
329  inline RightOperand rightOperand() const {
330  return vec_;
331  }
332  //**********************************************************************************************
333 
334  //**********************************************************************************************
340  template< typename T >
341  inline bool canAlias( const T* alias ) const {
342  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
343  }
344  //**********************************************************************************************
345 
346  //**********************************************************************************************
352  template< typename T >
353  inline bool isAliased( const T* alias ) const {
354  return ( mat_.isAliased( alias ) || vec_.isAliased( alias ) );
355  }
356  //**********************************************************************************************
357 
358  //**********************************************************************************************
363  inline bool isAligned() const {
364  return mat_.isAligned() && vec_.isAligned();
365  }
366  //**********************************************************************************************
367 
368  //**********************************************************************************************
373  inline bool canSMPAssign() const {
374  return ( !BLAZE_BLAS_IS_PARALLEL ||
375  ( IsComputation<MT>::value && !evaluateMatrix ) ||
376  ( mat_.rows() * mat_.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
377  ( size() > SMP_TDMATDVECMULT_THRESHOLD );
378  }
379  //**********************************************************************************************
380 
381  private:
382  //**Member variables****************************************************************************
383  LeftOperand mat_;
384  RightOperand vec_;
385  //**********************************************************************************************
386 
387  //**Assignment to dense vectors*****************************************************************
400  template< typename VT1 > // Type of the target dense vector
401  friend inline void assign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
402  {
404 
405  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
406 
407  if( rhs.mat_.rows() == 0UL ) {
408  return;
409  }
410  else if( rhs.mat_.columns() == 0UL ) {
411  reset( ~lhs );
412  return;
413  }
414 
415  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
416  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
417 
418  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
419  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
420  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
421  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
422 
423  TDMatDVecMultExpr::selectAssignKernel( ~lhs, A, x );
424  }
426  //**********************************************************************************************
427 
428  //**Assignment to dense vectors (kernel selection)**********************************************
439  template< typename VT1 // Type of the left-hand side target vector
440  , typename MT1 // Type of the left-hand side matrix operand
441  , typename VT2 > // Type of the right-hand side vector operand
442  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x )
443  {
444  if( ( IsDiagonal<MT1>::value ) ||
445  ( IsComputation<MT>::value && !evaluateMatrix ) ||
446  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
447  selectSmallAssignKernel( y, A, x );
448  else
449  selectBlasAssignKernel( y, A, x );
450  }
452  //**********************************************************************************************
453 
454  //**Default assignment to dense vectors*********************************************************
468  template< typename VT1 // Type of the left-hand side target vector
469  , typename MT1 // Type of the left-hand side matrix operand
470  , typename VT2 > // Type of the right-hand side vector operand
471  static inline void selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x )
472  {
473  const size_t M( A.rows() );
474  const size_t N( A.columns() );
475 
476  if( IsStrictlyLower<MT1>::value ) {
477  reset( y[0] );
478  }
479 
480  if( !IsUpper<MT1>::value )
481  {
482  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
483  y[i] = A(i,0UL) * x[0UL];
484  }
485  }
486 
487  for( size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
488  {
489  if( IsDiagonal<MT1>::value )
490  {
491  y[j] = A(j,j) * x[j];
492  }
493  else
494  {
495  const size_t ibegin( ( IsLower<MT1>::value )
496  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
497  :( 0UL ) );
498  const size_t iend( ( IsUpper<MT1>::value )
499  ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
500  :( M ) );
501  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
502 
503  const size_t inum( iend - ibegin );
504  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
505 
506  for( size_t i=ibegin; i<ipos; i+=2UL ) {
507  y[i ] += A(i ,j) * x[j];
508  y[i+1UL] += A(i+1UL,j) * x[j];
509  }
510  if( ipos < iend ) {
511  y[ipos] += A(ipos,j) * x[j];
512  }
513  if( IsUpper<MT1>::value ) {
514  y[iend] = A(iend,j) * x[j];
515  }
516  }
517  }
518 
519  if( IsStrictlyUpper<MT1>::value ) {
520  reset( y[M-1UL] );
521  }
522  }
524  //**********************************************************************************************
525 
526  //**Default assignment to dense vectors (small matrices)****************************************
540  template< typename VT1 // Type of the left-hand side target vector
541  , typename MT1 // Type of the left-hand side matrix operand
542  , typename VT2 > // Type of the right-hand side vector operand
543  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
544  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
545  {
546  selectDefaultAssignKernel( y, A, x );
547  }
549  //**********************************************************************************************
550 
551  //**Vectorized default assignment to dense vectors (small matrices)*****************************
565  template< typename VT1 // Type of the left-hand side target vector
566  , typename MT1 // Type of the left-hand side matrix operand
567  , typename VT2 > // Type of the right-hand side vector operand
568  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
569  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x )
570  {
571  typedef IntrinsicTrait<ElementType> IT;
572 
573  const size_t M( A.rows() );
574  const size_t N( A.columns() );
575 
576  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
577 
578  const size_t ipos( remainder ? ( M & size_t(-IT::size) ) : M );
579  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % IT::size ) ) == ipos, "Invalid end calculation" );
580 
581  size_t i( 0UL );
582 
583  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
584  {
585  const size_t jbegin( ( IsUpper<MT1>::value )
586  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
587  :( 0UL ) );
588  const size_t jend( ( IsLower<MT1>::value )
589  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
590  :( N ) );
591  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
592 
593  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
594 
595  for( size_t j=jbegin; j<jend; ++j ) {
596  const IntrinsicType x1( set( x[j] ) );
597  xmm1 = xmm1 + A.load(i ,j) * x1;
598  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
599  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
600  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
601  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
602  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
603  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
604  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
605  }
606 
607  y.store( i , xmm1 );
608  y.store( i+IT::size , xmm2 );
609  y.store( i+IT::size*2UL, xmm3 );
610  y.store( i+IT::size*3UL, xmm4 );
611  y.store( i+IT::size*4UL, xmm5 );
612  y.store( i+IT::size*5UL, xmm6 );
613  y.store( i+IT::size*6UL, xmm7 );
614  y.store( i+IT::size*7UL, xmm8 );
615  }
616 
617  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
618  {
619  const size_t jbegin( ( IsUpper<MT1>::value )
620  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
621  :( 0UL ) );
622  const size_t jend( ( IsLower<MT1>::value )
623  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
624  :( N ) );
625  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
626 
627  IntrinsicType xmm1, xmm2, xmm3, xmm4;
628 
629  for( size_t j=jbegin; j<jend; ++j ) {
630  const IntrinsicType x1( set( x[j] ) );
631  xmm1 = xmm1 + A.load(i ,j) * x1;
632  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
633  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
634  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
635  }
636 
637  y.store( i , xmm1 );
638  y.store( i+IT::size , xmm2 );
639  y.store( i+IT::size*2UL, xmm3 );
640  y.store( i+IT::size*3UL, xmm4 );
641  }
642 
643  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
644  {
645  const size_t jbegin( ( IsUpper<MT1>::value )
646  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
647  :( 0UL ) );
648  const size_t jend( ( IsLower<MT1>::value )
649  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
650  :( N ) );
651  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
652 
653  IntrinsicType xmm1, xmm2, xmm3;
654 
655  for( size_t j=jbegin; j<jend; ++j ) {
656  const IntrinsicType x1( set( x[j] ) );
657  xmm1 = xmm1 + A.load(i ,j) * x1;
658  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
659  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
660  }
661 
662  y.store( i , xmm1 );
663  y.store( i+IT::size , xmm2 );
664  y.store( i+IT::size*2UL, xmm3 );
665  }
666 
667  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
668  {
669  const size_t jbegin( ( IsUpper<MT1>::value )
670  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
671  :( 0UL ) );
672  const size_t jend( ( IsLower<MT1>::value )
673  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
674  :( N ) );
675  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
676 
677  IntrinsicType xmm1, xmm2;
678 
679  for( size_t j=jbegin; j<jend; ++j ) {
680  const IntrinsicType x1( set( x[j] ) );
681  xmm1 = xmm1 + A.load(i ,j) * x1;
682  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
683  }
684 
685  y.store( i , xmm1 );
686  y.store( i+IT::size, xmm2 );
687  }
688 
689  for( ; i<ipos; i+=IT::size )
690  {
691  const size_t jbegin( ( IsUpper<MT1>::value )
692  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
693  :( 0UL ) );
694  const size_t jend( ( IsLower<MT1>::value )
695  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
696  :( N ) );
697  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
698 
699  IntrinsicType xmm1;
700 
701  for( size_t j=jbegin; j<jend; ++j ) {
702  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
703  }
704 
705  y.store( i, xmm1 );
706  }
707 
708  for( ; remainder && i<M; ++i )
709  {
710  const size_t jbegin( ( IsUpper<MT1>::value )
711  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
712  :( 0UL ) );
713  const size_t jend( ( IsLower<MT1>::value )
714  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
715  :( N ) );
716  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
717 
718  ElementType value = ElementType();
719 
720  for( size_t j=jbegin; j<jend; ++j ) {
721  value += A(i,j) * x[j];
722  }
723 
724  y[i] = value;
725  }
726  }
728  //**********************************************************************************************
729 
730  //**Default assignment to dense vectors (large matrices)****************************************
744  template< typename VT1 // Type of the left-hand side target vector
745  , typename MT1 // Type of the left-hand side matrix operand
746  , typename VT2 > // Type of the right-hand side vector operand
747  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
748  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
749  {
750  selectDefaultAssignKernel( y, A, x );
751  }
753  //**********************************************************************************************
754 
755  //**Vectorized default assignment to dense vectors (large matrices)*****************************
769  template< typename VT1 // Type of the left-hand side target vector
770  , typename MT1 // Type of the left-hand side matrix operand
771  , typename VT2 > // Type of the right-hand side vector operand
772  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
773  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x )
774  {
775  typedef IntrinsicTrait<ElementType> IT;
776 
777  const size_t M( A.rows() );
778  const size_t N( A.columns() );
779 
780  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
781 
782  const size_t iblock( 32768UL / sizeof( ElementType ) );
783  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
784 
785  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
786 
787  reset( y );
788 
789  for( size_t ii=0U; ii<M; ii+=iblock ) {
790  for( size_t jj=0UL; jj<N; jj+=jblock )
791  {
792  const size_t jend( min( jj+jblock, N ) );
793  const size_t itmp( min( ii+iblock, M ) );
794  const size_t iend( ( IsUpper<MT1>::value )
795  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
796  :( itmp ) );
797 
798  const size_t ipos( remainder ? ( iend & size_t(-IT::size) ) : iend );
799  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % IT::size ) ) == ipos, "Invalid end calculation" );
800 
801  size_t i( ( IsLower<MT1>::value )
802  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
803  :( ii ) );
804 
805  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
806  {
807  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
808 
809  for( size_t j=jj; j<jend; ++j ) {
810  const IntrinsicType x1( set( x[j] ) );
811  xmm1 = xmm1 + A.load(i ,j) * x1;
812  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
813  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
814  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
815  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
816  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
817  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
818  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
819  }
820 
821  y.store( i , y.load(i ) + xmm1 );
822  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
823  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
824  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4 );
825  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5 );
826  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6 );
827  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7 );
828  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8 );
829  }
830 
831  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
832  {
833  IntrinsicType xmm1, xmm2, xmm3, xmm4;
834 
835  for( size_t j=jj; j<jend; ++j ) {
836  const IntrinsicType x1( set( x[j] ) );
837  xmm1 = xmm1 + A.load(i ,j) * x1;
838  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
839  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
840  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
841  }
842 
843  y.store( i , y.load(i ) + xmm1 );
844  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
845  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
846  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4 );
847  }
848 
849  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
850  {
851  IntrinsicType xmm1, xmm2, xmm3;
852 
853  for( size_t j=jj; j<jend; ++j ) {
854  const IntrinsicType x1( set( x[j] ) );
855  xmm1 = xmm1 + A.load(i ,j) * x1;
856  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
857  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
858  }
859 
860  y.store( i , y.load(i ) + xmm1 );
861  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
862  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
863  }
864 
865  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
866  {
867  IntrinsicType xmm1, xmm2;
868 
869  for( size_t j=jj; j<jend; ++j ) {
870  const IntrinsicType x1( set( x[j] ) );
871  xmm1 = xmm1 + A.load(i ,j) * x1;
872  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
873  }
874 
875  y.store( i , y.load(i ) + xmm1 );
876  y.store( i+IT::size, y.load(i+IT::size) + xmm2 );
877  }
878 
879  for( ; i<ipos; i+=IT::size )
880  {
881  IntrinsicType xmm1;
882 
883  for( size_t j=jj; j<jend; ++j ) {
884  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
885  }
886 
887  y.store( i, y.load(i) + xmm1 );
888  }
889 
890  for( ; remainder && i<iend; ++i )
891  {
892  ElementType value = ElementType();
893 
894  for( size_t j=jj; j<jend; ++j ) {
895  value += A(i,j) * x[j];
896  }
897 
898  y[i] += value;
899  }
900  }
901  }
902  }
904  //**********************************************************************************************
905 
906  //**BLAS-based assignment to dense vectors (default)********************************************
920  template< typename VT1 // Type of the left-hand side target vector
921  , typename MT1 // Type of the left-hand side matrix operand
922  , typename VT2 > // Type of the right-hand side vector operand
923  static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
924  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
925  {
926  selectLargeAssignKernel( y, A, x );
927  }
929  //**********************************************************************************************
930 
931  //**BLAS-based assignment to dense vectors******************************************************
932 #if BLAZE_BLAS_MODE
933 
946  template< typename VT1 // Type of the left-hand side target vector
947  , typename MT1 // Type of the left-hand side matrix operand
948  , typename VT2 > // Type of the right-hand side vector operand
949  static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
950  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x )
951  {
952  typedef typename VT1::ElementType ET;
953 
954  if( IsTriangular<MT1>::value ) {
955  assign( y, x );
956  trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
957  }
958  else {
959  gemv( y, A, x, ET(1), ET(0) );
960  }
961  }
963 #endif
964  //**********************************************************************************************
965 
966  //**Assignment to sparse vectors****************************************************************
979  template< typename VT1 > // Type of the target sparse vector
980  friend inline void assign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
981  {
983 
987 
988  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
989 
990  const ResultType tmp( serial( rhs ) );
991  assign( ~lhs, tmp );
992  }
994  //**********************************************************************************************
995 
996  //**Addition assignment to dense vectors********************************************************
1009  template< typename VT1 > // Type of the target dense vector
1010  friend inline void addAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1011  {
1013 
1014  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1015 
1016  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1017  return;
1018  }
1019 
1020  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1021  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1022 
1023  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1024  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1025  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1026  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1027 
1028  TDMatDVecMultExpr::selectAddAssignKernel( ~lhs, A, x );
1029  }
1031  //**********************************************************************************************
1032 
1033  //**Addition assignment to dense vectors (kernel selection)*************************************
1044  template< typename VT1 // Type of the left-hand side target vector
1045  , typename MT1 // Type of the left-hand side matrix operand
1046  , typename VT2 > // Type of the right-hand side vector operand
1047  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1048  {
1049  if( ( IsDiagonal<MT1>::value ) ||
1050  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1051  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1052  selectSmallAddAssignKernel( y, A, x );
1053  else
1054  selectBlasAddAssignKernel( y, A, x );
1055  }
1057  //**********************************************************************************************
1058 
1059  //**Default addition assignment to dense vectors************************************************
1073  template< typename VT1 // Type of the left-hand side target vector
1074  , typename MT1 // Type of the left-hand side matrix operand
1075  , typename VT2 > // Type of the right-hand side vector operand
1076  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1077  {
1078  const size_t M( A.rows() );
1079  const size_t N( A.columns() );
1080 
1081  for( size_t j=0UL; j<N; ++j )
1082  {
1083  if( IsDiagonal<MT1>::value )
1084  {
1085  y[j] += A(j,j) * x[j];
1086  }
1087  else
1088  {
1089  const size_t ibegin( ( IsLower<MT1>::value )
1090  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1091  :( 0UL ) );
1092  const size_t iend( ( IsUpper<MT1>::value )
1093  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1094  :( M ) );
1095  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1096 
1097  const size_t inum( iend - ibegin );
1098  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1099 
1100  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1101  y[i ] += A(i ,j) * x[j];
1102  y[i+1UL] += A(i+1UL,j) * x[j];
1103  }
1104  if( ipos < iend ) {
1105  y[ipos] += A(ipos,j) * x[j];
1106  }
1107  }
1108  }
1109  }
1111  //**********************************************************************************************
1112 
1113  //**Default addition assignment to dense vectors (small matrices)*******************************
1127  template< typename VT1 // Type of the left-hand side target vector
1128  , typename MT1 // Type of the left-hand side matrix operand
1129  , typename VT2 > // Type of the right-hand side vector operand
1130  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1131  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1132  {
1133  selectDefaultAddAssignKernel( y, A, x );
1134  }
1136  //**********************************************************************************************
1137 
1138  //**Vectorized default addition assignment to dense vectors (small matrices)********************
1152  template< typename VT1 // Type of the left-hand side target vector
1153  , typename MT1 // Type of the left-hand side matrix operand
1154  , typename VT2 > // Type of the right-hand side vector operand
1155  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1156  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1157  {
1158  typedef IntrinsicTrait<ElementType> IT;
1159 
1160  const size_t M( A.rows() );
1161  const size_t N( A.columns() );
1162 
1163  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1164 
1165  const size_t ipos( remainder ? ( M & size_t(-IT::size) ) : M );
1166  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % IT::size ) ) == ipos, "Invalid end calculation" );
1167 
1168  size_t i( 0UL );
1169 
1170  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
1171  {
1172  const size_t jbegin( ( IsUpper<MT1>::value )
1173  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1174  :( 0UL ) );
1175  const size_t jend( ( IsLower<MT1>::value )
1176  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1177  :( N ) );
1178  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1179 
1180  IntrinsicType xmm1( y.load(i ) );
1181  IntrinsicType xmm2( y.load(i+IT::size ) );
1182  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1183  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1184  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1185  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1186  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1187  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1188 
1189  for( size_t j=jbegin; j<jend; ++j ) {
1190  const IntrinsicType x1( set( x[j] ) );
1191  xmm1 = xmm1 + A.load(i ,j) * x1;
1192  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1193  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1194  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1195  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
1196  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
1197  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
1198  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
1199  }
1200 
1201  y.store( i , xmm1 );
1202  y.store( i+IT::size , xmm2 );
1203  y.store( i+IT::size*2UL, xmm3 );
1204  y.store( i+IT::size*3UL, xmm4 );
1205  y.store( i+IT::size*4UL, xmm5 );
1206  y.store( i+IT::size*5UL, xmm6 );
1207  y.store( i+IT::size*6UL, xmm7 );
1208  y.store( i+IT::size*7UL, xmm8 );
1209  }
1210 
1211  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
1212  {
1213  const size_t jbegin( ( IsUpper<MT1>::value )
1214  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1215  :( 0UL ) );
1216  const size_t jend( ( IsLower<MT1>::value )
1217  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1218  :( N ) );
1219  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1220 
1221  IntrinsicType xmm1( y.load(i ) );
1222  IntrinsicType xmm2( y.load(i+IT::size ) );
1223  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1224  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1225 
1226  for( size_t j=jbegin; j<jend; ++j ) {
1227  const IntrinsicType x1( set( x[j] ) );
1228  xmm1 = xmm1 + A.load(i ,j) * x1;
1229  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1230  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1231  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1232  }
1233 
1234  y.store( i , xmm1 );
1235  y.store( i+IT::size , xmm2 );
1236  y.store( i+IT::size*2UL, xmm3 );
1237  y.store( i+IT::size*3UL, xmm4 );
1238  }
1239 
1240  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
1241  {
1242  const size_t jbegin( ( IsUpper<MT1>::value )
1243  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1244  :( 0UL ) );
1245  const size_t jend( ( IsLower<MT1>::value )
1246  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1247  :( N ) );
1248  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1249 
1250  IntrinsicType xmm1( y.load(i ) );
1251  IntrinsicType xmm2( y.load(i+IT::size ) );
1252  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1253 
1254  for( size_t j=jbegin; j<jend; ++j ) {
1255  const IntrinsicType x1( set( x[j] ) );
1256  xmm1 = xmm1 + A.load(i ,j) * x1;
1257  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1258  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1259  }
1260 
1261  y.store( i , xmm1 );
1262  y.store( i+IT::size , xmm2 );
1263  y.store( i+IT::size*2UL, xmm3 );
1264  }
1265 
1266  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
1267  {
1268  const size_t jbegin( ( IsUpper<MT1>::value )
1269  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1270  :( 0UL ) );
1271  const size_t jend( ( IsLower<MT1>::value )
1272  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1273  :( N ) );
1274  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1275 
1276  IntrinsicType xmm1( y.load(i ) );
1277  IntrinsicType xmm2( y.load(i+IT::size) );
1278 
1279  for( size_t j=jbegin; j<jend; ++j ) {
1280  const IntrinsicType x1( set( x[j] ) );
1281  xmm1 = xmm1 + A.load(i ,j) * x1;
1282  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
1283  }
1284 
1285  y.store( i , xmm1 );
1286  y.store( i+IT::size, xmm2 );
1287  }
1288 
1289  for( ; i<ipos; i+=IT::size )
1290  {
1291  const size_t jbegin( ( IsUpper<MT1>::value )
1292  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1293  :( 0UL ) );
1294  const size_t jend( ( IsLower<MT1>::value )
1295  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1296  :( N ) );
1297  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1298 
1299  IntrinsicType xmm1( y.load(i) );
1300 
1301  for( size_t j=jbegin; j<jend; ++j ) {
1302  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
1303  }
1304 
1305  y.store( i, xmm1 );
1306  }
1307 
1308  for( ; remainder && i<M; ++i )
1309  {
1310  const size_t jbegin( ( IsUpper<MT1>::value )
1311  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1312  :( 0UL ) );
1313  const size_t jend( ( IsLower<MT1>::value )
1314  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1315  :( N ) );
1316  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1317 
1318  ElementType value = ElementType();
1319 
1320  for( size_t j=jbegin; j<jend; ++j ) {
1321  value += A(i,j) * x[j];
1322  }
1323 
1324  y[i] += value;
1325  }
1326  }
1328  //**********************************************************************************************
1329 
1330  //**Default addition assignment to dense vectors (large matrices)*******************************
1344  template< typename VT1 // Type of the left-hand side target vector
1345  , typename MT1 // Type of the left-hand side matrix operand
1346  , typename VT2 > // Type of the right-hand side vector operand
1347  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1348  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1349  {
1350  selectDefaultAddAssignKernel( y, A, x );
1351  }
1353  //**********************************************************************************************
1354 
1355  //**Vectorized default addition assignment to dense vectors (large matrices)********************
1369  template< typename VT1 // Type of the left-hand side target vector
1370  , typename MT1 // Type of the left-hand side matrix operand
1371  , typename VT2 > // Type of the right-hand side vector operand
1372  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1373  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1374  {
1375  typedef IntrinsicTrait<ElementType> IT;
1376 
1377  const size_t M( A.rows() );
1378  const size_t N( A.columns() );
1379 
1380  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1381 
1382  const size_t iblock( 32768UL / sizeof( ElementType ) );
1383  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1384 
1385  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
1386 
1387  for( size_t ii=0U; ii<M; ii+=iblock ) {
1388  for( size_t jj=0UL; jj<N; jj+=jblock )
1389  {
1390  const size_t jend( min( jj+jblock, N ) );
1391  const size_t itmp( min( ii+iblock, M ) );
1392  const size_t iend( ( IsUpper<MT1>::value )
1393  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1394  :( itmp ) );
1395 
1396  const size_t ipos( remainder ? ( iend & size_t(-IT::size) ) : iend );
1397  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % IT::size ) ) == ipos, "Invalid end calculation" );
1398 
1399  size_t i( ( IsLower<MT1>::value )
1400  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
1401  :( ii ) );
1402 
1403  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
1404  {
1405  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1406 
1407  for( size_t j=jj; j<jend; ++j ) {
1408  const IntrinsicType x1( set( x[j] ) );
1409  xmm1 = xmm1 + A.load(i ,j) * x1;
1410  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1411  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1412  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1413  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
1414  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
1415  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
1416  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
1417  }
1418 
1419  y.store( i , y.load(i ) + xmm1 );
1420  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
1421  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
1422  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4 );
1423  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5 );
1424  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6 );
1425  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7 );
1426  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8 );
1427  }
1428 
1429  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
1430  {
1431  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1432 
1433  for( size_t j=jj; j<jend; ++j ) {
1434  const IntrinsicType x1( set( x[j] ) );
1435  xmm1 = xmm1 + A.load(i ,j) * x1;
1436  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1437  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1438  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1439  }
1440 
1441  y.store( i , y.load(i ) + xmm1 );
1442  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
1443  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
1444  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4 );
1445  }
1446 
1447  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
1448  {
1449  IntrinsicType xmm1, xmm2, xmm3;
1450 
1451  for( size_t j=jj; j<jend; ++j ) {
1452  const IntrinsicType x1( set( x[j] ) );
1453  xmm1 = xmm1 + A.load(i ,j) * x1;
1454  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1455  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1456  }
1457 
1458  y.store( i , y.load(i ) + xmm1 );
1459  y.store( i+IT::size , y.load(i+IT::size ) + xmm2 );
1460  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3 );
1461  }
1462 
1463  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
1464  {
1465  IntrinsicType xmm1, xmm2;
1466 
1467  for( size_t j=jj; j<jend; ++j ) {
1468  const IntrinsicType x1( set( x[j] ) );
1469  xmm1 = xmm1 + A.load(i ,j) * x1;
1470  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
1471  }
1472 
1473  y.store( i , y.load(i ) + xmm1 );
1474  y.store( i+IT::size, y.load(i+IT::size) + xmm2 );
1475  }
1476 
1477  for( ; i<ipos; i+=IT::size )
1478  {
1479  IntrinsicType xmm1;
1480 
1481  for( size_t j=jj; j<jend; ++j ) {
1482  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
1483  }
1484 
1485  y.store( i, y.load(i) + xmm1 );
1486  }
1487 
1488  for( ; remainder && i<iend; ++i )
1489  {
1490  ElementType value = ElementType();
1491 
1492  for( size_t j=jj; j<jend; ++j ) {
1493  value += A(i,j) * x[j];
1494  }
1495 
1496  y[i] += value;
1497  }
1498  }
1499  }
1500  }
1502  //**********************************************************************************************
1503 
1504  //**BLAS-based addition assignment to dense vectors (default)***********************************
1518  template< typename VT1 // Type of the left-hand side target vector
1519  , typename MT1 // Type of the left-hand side matrix operand
1520  , typename VT2 > // Type of the right-hand side vector operand
1521  static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
1522  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1523  {
1524  selectLargeAddAssignKernel( y, A, x );
1525  }
1527  //**********************************************************************************************
1528 
1529  //**BLAS-based addition assignment to dense vectors*********************************************
1530 #if BLAZE_BLAS_MODE
1531 
1544  template< typename VT1 // Type of the left-hand side target vector
1545  , typename MT1 // Type of the left-hand side matrix operand
1546  , typename VT2 > // Type of the right-hand side vector operand
1547  static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
1548  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x )
1549  {
1550  typedef typename VT1::ElementType ET;
1551 
1552  if( IsTriangular<MT1>::value ) {
1553  typename VT1::ResultType tmp( serial( x ) );
1554  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1555  addAssign( y, tmp );
1556  }
1557  else {
1558  gemv( y, A, x, ET(1), ET(1) );
1559  }
1560  }
1562 #endif
1563  //**********************************************************************************************
1564 
1565  //**Addition assignment to sparse vectors*******************************************************
1566  // No special implementation for the addition assignment to sparse vectors.
1567  //**********************************************************************************************
1568 
1569  //**Subtraction assignment to dense vectors*****************************************************
1582  template< typename VT1 > // Type of the target dense vector
1583  friend inline void subAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
1584  {
1586 
1587  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1588 
1589  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1590  return;
1591  }
1592 
1593  LT A( serial( rhs.mat_ ) ); // Evaluation of the left-hand side dense matrix operand
1594  RT x( serial( rhs.vec_ ) ); // Evaluation of the right-hand side dense vector operand
1595 
1596  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1597  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1598  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1599  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
1600 
1601  TDMatDVecMultExpr::selectSubAssignKernel( ~lhs, A, x );
1602  }
1604  //**********************************************************************************************
1605 
1606  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1617  template< typename VT1 // Type of the left-hand side target vector
1618  , typename MT1 // Type of the left-hand side matrix operand
1619  , typename VT2 > // Type of the right-hand side vector operand
1620  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1621  {
1622  if( ( IsDiagonal<MT1>::value ) ||
1623  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1624  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
1625  selectSmallSubAssignKernel( y, A, x );
1626  else
1627  selectBlasSubAssignKernel( y, A, x );
1628  }
1630  //**********************************************************************************************
1631 
1632  //**Default subtraction assignment to dense vectors*********************************************
1646  template< typename VT1 // Type of the left-hand side target vector
1647  , typename MT1 // Type of the left-hand side matrix operand
1648  , typename VT2 > // Type of the right-hand side vector operand
1649  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1650  {
1651  const size_t M( A.rows() );
1652  const size_t N( A.columns() );
1653 
1654  for( size_t j=0UL; j<N; ++j )
1655  {
1656  if( IsDiagonal<MT1>::value )
1657  {
1658  y[j] -= A(j,j) * x[j];
1659  }
1660  else
1661  {
1662  const size_t ibegin( ( IsLower<MT1>::value )
1663  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1664  :( 0UL ) );
1665  const size_t iend( ( IsUpper<MT1>::value )
1666  ?( IsStrictlyUpper<MT1>::value ? j : j+1UL )
1667  :( M ) );
1668  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1669 
1670  const size_t inum( iend - ibegin );
1671  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1672 
1673  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1674  y[i ] -= A(i ,j) * x[j];
1675  y[i+1UL] -= A(i+1UL,j) * x[j];
1676  }
1677  if( ipos < iend ) {
1678  y[ipos] -= A(ipos,j) * x[j];
1679  }
1680  }
1681  }
1682  }
1684  //**********************************************************************************************
1685 
1686  //**Default subtraction assignment to dense vectors (small matrices)****************************
1700  template< typename VT1 // Type of the left-hand side target vector
1701  , typename MT1 // Type of the left-hand side matrix operand
1702  , typename VT2 > // Type of the right-hand side vector operand
1703  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1704  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1705  {
1706  selectDefaultSubAssignKernel( y, A, x );
1707  }
1709  //**********************************************************************************************
1710 
1711  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
1726  template< typename VT1 // Type of the left-hand side target vector
1727  , typename MT1 // Type of the left-hand side matrix operand
1728  , typename VT2 > // Type of the right-hand side vector operand
1729  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1730  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1731  {
1732  typedef IntrinsicTrait<ElementType> IT;
1733 
1734  const size_t M( A.rows() );
1735  const size_t N( A.columns() );
1736 
1737  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1738 
1739  const size_t ipos( remainder ? ( M & size_t(-IT::size) ) : M );
1740  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % IT::size ) ) == ipos, "Invalid end calculation" );
1741 
1742  size_t i( 0UL );
1743 
1744  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
1745  {
1746  const size_t jbegin( ( IsUpper<MT1>::value )
1747  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1748  :( 0UL ) );
1749  const size_t jend( ( IsLower<MT1>::value )
1750  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1751  :( N ) );
1752  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1753 
1754  IntrinsicType xmm1( y.load(i ) );
1755  IntrinsicType xmm2( y.load(i+IT::size ) );
1756  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1757  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1758  IntrinsicType xmm5( y.load(i+IT::size*4UL) );
1759  IntrinsicType xmm6( y.load(i+IT::size*5UL) );
1760  IntrinsicType xmm7( y.load(i+IT::size*6UL) );
1761  IntrinsicType xmm8( y.load(i+IT::size*7UL) );
1762 
1763  for( size_t j=jbegin; j<jend; ++j ) {
1764  const IntrinsicType x1( set( x[j] ) );
1765  xmm1 = xmm1 - A.load(i ,j) * x1;
1766  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1767  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1768  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1769  xmm5 = xmm5 - A.load(i+IT::size*4UL,j) * x1;
1770  xmm6 = xmm6 - A.load(i+IT::size*5UL,j) * x1;
1771  xmm7 = xmm7 - A.load(i+IT::size*6UL,j) * x1;
1772  xmm8 = xmm8 - A.load(i+IT::size*7UL,j) * x1;
1773  }
1774 
1775  y.store( i , xmm1 );
1776  y.store( i+IT::size , xmm2 );
1777  y.store( i+IT::size*2UL, xmm3 );
1778  y.store( i+IT::size*3UL, xmm4 );
1779  y.store( i+IT::size*4UL, xmm5 );
1780  y.store( i+IT::size*5UL, xmm6 );
1781  y.store( i+IT::size*6UL, xmm7 );
1782  y.store( i+IT::size*7UL, xmm8 );
1783  }
1784 
1785  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
1786  {
1787  const size_t jbegin( ( IsUpper<MT1>::value )
1788  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1789  :( 0UL ) );
1790  const size_t jend( ( IsLower<MT1>::value )
1791  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1792  :( N ) );
1793  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1794 
1795  IntrinsicType xmm1( y.load(i ) );
1796  IntrinsicType xmm2( y.load(i+IT::size ) );
1797  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1798  IntrinsicType xmm4( y.load(i+IT::size*3UL) );
1799 
1800  for( size_t j=jbegin; j<jend; ++j ) {
1801  const IntrinsicType x1( set( x[j] ) );
1802  xmm1 = xmm1 - A.load(i ,j) * x1;
1803  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1804  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1805  xmm4 = xmm4 - A.load(i+IT::size*3UL,j) * x1;
1806  }
1807 
1808  y.store( i , xmm1 );
1809  y.store( i+IT::size , xmm2 );
1810  y.store( i+IT::size*2UL, xmm3 );
1811  y.store( i+IT::size*3UL, xmm4 );
1812  }
1813 
1814  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
1815  {
1816  const size_t jbegin( ( IsUpper<MT1>::value )
1817  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1818  :( 0UL ) );
1819  const size_t jend( ( IsLower<MT1>::value )
1820  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1821  :( N ) );
1822  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1823 
1824  IntrinsicType xmm1( y.load(i ) );
1825  IntrinsicType xmm2( y.load(i+IT::size ) );
1826  IntrinsicType xmm3( y.load(i+IT::size*2UL) );
1827 
1828  for( size_t j=jbegin; j<jend; ++j ) {
1829  const IntrinsicType x1( set( x[j] ) );
1830  xmm1 = xmm1 - A.load(i ,j) * x1;
1831  xmm2 = xmm2 - A.load(i+IT::size ,j) * x1;
1832  xmm3 = xmm3 - A.load(i+IT::size*2UL,j) * x1;
1833  }
1834 
1835  y.store( i , xmm1 );
1836  y.store( i+IT::size , xmm2 );
1837  y.store( i+IT::size*2UL, xmm3 );
1838  }
1839 
1840  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
1841  {
1842  const size_t jbegin( ( IsUpper<MT1>::value )
1843  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1844  :( 0UL ) );
1845  const size_t jend( ( IsLower<MT1>::value )
1846  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1847  :( N ) );
1848  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1849 
1850  IntrinsicType xmm1( y.load(i ) );
1851  IntrinsicType xmm2( y.load(i+IT::size) );
1852 
1853  for( size_t j=jbegin; j<jend; ++j ) {
1854  const IntrinsicType x1( set( x[j] ) );
1855  xmm1 = xmm1 - A.load(i ,j) * x1;
1856  xmm2 = xmm2 - A.load(i+IT::size,j) * x1;
1857  }
1858 
1859  y.store( i , xmm1 );
1860  y.store( i+IT::size, xmm2 );
1861  }
1862 
1863  for( ; i<ipos; i+=IT::size )
1864  {
1865  const size_t jbegin( ( IsUpper<MT1>::value )
1866  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1867  :( 0UL ) );
1868  const size_t jend( ( IsLower<MT1>::value )
1869  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1870  :( N ) );
1871  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1872 
1873  IntrinsicType xmm1( y.load(i) );
1874 
1875  for( size_t j=jbegin; j<jend; ++j ) {
1876  xmm1 = xmm1 - A.load(i,j) * set( x[j] );
1877  }
1878 
1879  y.store( i, xmm1 );
1880  }
1881 
1882  for( ; remainder && i<M; ++i )
1883  {
1884  const size_t jbegin( ( IsUpper<MT1>::value )
1885  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1886  :( 0UL ) );
1887  const size_t jend( ( IsLower<MT1>::value )
1888  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
1889  :( N ) );
1890  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1891 
1892  ElementType value = ElementType();
1893 
1894  for( size_t j=jbegin; j<jend; ++j ) {
1895  value += A(i,j) * x[j];
1896  }
1897 
1898  y[i] -= value;
1899  }
1900  }
1902  //**********************************************************************************************
1903 
1904  //**Default subtraction assignment to dense vectors (large matrices)****************************
1918  template< typename VT1 // Type of the left-hand side target vector
1919  , typename MT1 // Type of the left-hand side matrix operand
1920  , typename VT2 > // Type of the right-hand side vector operand
1921  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1922  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1923  {
1924  selectDefaultSubAssignKernel( y, A, x );
1925  }
1927  //**********************************************************************************************
1928 
1929  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
1944  template< typename VT1 // Type of the left-hand side target vector
1945  , typename MT1 // Type of the left-hand side matrix operand
1946  , typename VT2 > // Type of the right-hand side vector operand
1947  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2> >::Type
1948  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
1949  {
1950  typedef IntrinsicTrait<ElementType> IT;
1951 
1952  const size_t M( A.rows() );
1953  const size_t N( A.columns() );
1954 
1955  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
1956 
1957  const size_t iblock( 32768UL / sizeof( ElementType ) );
1958  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
1959 
1960  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
1961 
1962  for( size_t ii=0U; ii<M; ii+=iblock ) {
1963  for( size_t jj=0UL; jj<N; jj+=jblock )
1964  {
1965  const size_t jend( min( jj+jblock, N ) );
1966  const size_t itmp( min( ii+iblock, M ) );
1967  const size_t iend( ( IsUpper<MT1>::value )
1968  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
1969  :( itmp ) );
1970 
1971  const size_t ipos( remainder ? ( iend & size_t(-IT::size) ) : iend );
1972  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % IT::size ) ) == ipos, "Invalid end calculation" );
1973 
1974  size_t i( ( IsLower<MT1>::value )
1975  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
1976  :( ii ) );
1977 
1978  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
1979  {
1980  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1981 
1982  for( size_t j=jj; j<jend; ++j ) {
1983  const IntrinsicType x1( set( x[j] ) );
1984  xmm1 = xmm1 + A.load(i ,j) * x1;
1985  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
1986  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
1987  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
1988  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
1989  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
1990  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
1991  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
1992  }
1993 
1994  y.store( i , y.load(i ) - xmm1 );
1995  y.store( i+IT::size , y.load(i+IT::size ) - xmm2 );
1996  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3 );
1997  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4 );
1998  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5 );
1999  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6 );
2000  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7 );
2001  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8 );
2002  }
2003 
2004  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
2005  {
2006  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2007 
2008  for( size_t j=jj; j<jend; ++j ) {
2009  const IntrinsicType x1( set( x[j] ) );
2010  xmm1 = xmm1 + A.load(i ,j) * x1;
2011  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2012  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2013  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2014  }
2015 
2016  y.store( i , y.load(i ) - xmm1 );
2017  y.store( i+IT::size , y.load(i+IT::size ) - xmm2 );
2018  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3 );
2019  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4 );
2020  }
2021 
2022  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
2023  {
2024  IntrinsicType xmm1, xmm2, xmm3;
2025 
2026  for( size_t j=jj; j<jend; ++j ) {
2027  const IntrinsicType x1( set( x[j] ) );
2028  xmm1 = xmm1 + A.load(i ,j) * x1;
2029  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2030  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2031  }
2032 
2033  y.store( i , y.load(i ) - xmm1 );
2034  y.store( i+IT::size , y.load(i+IT::size ) - xmm2 );
2035  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3 );
2036  }
2037 
2038  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
2039  {
2040  IntrinsicType xmm1, xmm2;
2041 
2042  for( size_t j=jj; j<jend; ++j ) {
2043  const IntrinsicType x1( set( x[j] ) );
2044  xmm1 = xmm1 + A.load(i ,j) * x1;
2045  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2046  }
2047 
2048  y.store( i , y.load(i ) - xmm1 );
2049  y.store( i+IT::size, y.load(i+IT::size) - xmm2 );
2050  }
2051 
2052  for( ; i<ipos; i+=IT::size )
2053  {
2054  IntrinsicType xmm1;
2055 
2056  for( size_t j=jj; j<jend; ++j ) {
2057  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
2058  }
2059 
2060  y.store( i, y.load(i) - xmm1 );
2061  }
2062 
2063  for( ; remainder && i<iend; ++i )
2064  {
2065  ElementType value = ElementType();
2066 
2067  for( size_t j=jj; j<jend; ++j ) {
2068  value += A(i,j) * x[j];
2069  }
2070 
2071  y[i] -= value;
2072  }
2073  }
2074  }
2075  }
2077  //**********************************************************************************************
2078 
2079  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2093  template< typename VT1 // Type of the left-hand side target vector
2094  , typename MT1 // Type of the left-hand side matrix operand
2095  , typename VT2 > // Type of the right-hand side vector operand
2096  static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
2097  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2098  {
2099  selectLargeSubAssignKernel( y, A, x );
2100  }
2102  //**********************************************************************************************
2103 
2104  //**BLAS-based subtraction assignment to dense vectors******************************************
2105 #if BLAZE_BLAS_MODE
2106 
2119  template< typename VT1 // Type of the left-hand side target vector
2120  , typename MT1 // Type of the left-hand side matrix operand
2121  , typename VT2 > // Type of the right-hand side vector operand
2122  static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2> >::Type
2123  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x )
2124  {
2125  typedef typename VT1::ElementType ET;
2126 
2127  if( IsTriangular<MT1>::value ) {
2128  typename VT1::ResultType tmp( serial( x ) );
2129  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2130  subAssign( y, tmp );
2131  }
2132  else {
2133  gemv( y, A, x, ET(-1), ET(1) );
2134  }
2135  }
2137 #endif
2138  //**********************************************************************************************
2139 
2140  //**Subtraction assignment to sparse vectors****************************************************
2141  // No special implementation for the subtraction assignment to sparse vectors.
2142  //**********************************************************************************************
2143 
2144  //**Multiplication assignment to dense vectors**************************************************
2157  template< typename VT1 > // Type of the target dense vector
2158  friend inline void multAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2159  {
2161 
2165 
2166  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2167 
2168  const ResultType tmp( serial( rhs ) );
2169  multAssign( ~lhs, tmp );
2170  }
2172  //**********************************************************************************************
2173 
2174  //**Multiplication assignment to sparse vectors*************************************************
2175  // No special implementation for the multiplication assignment to sparse vectors.
2176  //**********************************************************************************************
2177 
2178  //**SMP assignment to dense vectors*************************************************************
2193  template< typename VT1 > // Type of the target dense vector
2194  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2195  smpAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2196  {
2198 
2199  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2200 
2201  if( rhs.mat_.rows() == 0UL ) {
2202  return;
2203  }
2204  else if( rhs.mat_.columns() == 0UL ) {
2205  reset( ~lhs );
2206  return;
2207  }
2208 
2209  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2210  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2211 
2212  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2213  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2214  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2215  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2216 
2217  smpAssign( ~lhs, A * x );
2218  }
2220  //**********************************************************************************************
2221 
2222  //**SMP assignment to sparse vectors************************************************************
2237  template< typename VT1 > // Type of the target sparse vector
2238  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2239  smpAssign( SparseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2240  {
2242 
2246 
2247  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2248 
2249  const ResultType tmp( rhs );
2250  smpAssign( ~lhs, tmp );
2251  }
2253  //**********************************************************************************************
2254 
2255  //**SMP addition assignment to dense vectors****************************************************
2270  template< typename VT1 > // Type of the target dense vector
2271  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2272  smpAddAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2273  {
2275 
2276  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2277 
2278  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2279  return;
2280  }
2281 
2282  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2283  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2284 
2285  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2286  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2287  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2288  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2289 
2290  smpAddAssign( ~lhs, A * x );
2291  }
2293  //**********************************************************************************************
2294 
2295  //**SMP addition assignment to sparse vectors***************************************************
2296  // No special implementation for the SMP addition assignment to sparse vectors.
2297  //**********************************************************************************************
2298 
2299  //**SMP subtraction assignment to dense vectors*************************************************
2314  template< typename VT1 > // Type of the target dense vector
2315  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2316  smpSubAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2317  {
2319 
2320  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2321 
2322  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2323  return;
2324  }
2325 
2326  LT A( rhs.mat_ ); // Evaluation of the left-hand side dense matrix operand
2327  RT x( rhs.vec_ ); // Evaluation of the right-hand side dense vector operand
2328 
2329  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2330  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2331  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2332  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2333 
2334  smpSubAssign( ~lhs, A * x );
2335  }
2337  //**********************************************************************************************
2338 
2339  //**SMP subtraction assignment to sparse vectors************************************************
2340  // No special implementation for the SMP subtraction assignment to sparse vectors.
2341  //**********************************************************************************************
2342 
2343  //**SMP multiplication assignment to dense vectors**********************************************
2358  template< typename VT1 > // Type of the target dense vector
2359  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2360  smpMultAssign( DenseVector<VT1,false>& lhs, const TDMatDVecMultExpr& rhs )
2361  {
2363 
2367 
2368  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2369 
2370  const ResultType tmp( rhs );
2371  smpMultAssign( ~lhs, tmp );
2372  }
2374  //**********************************************************************************************
2375 
2376  //**SMP multiplication assignment to sparse vectors*********************************************
2377  // No special implementation for the SMP multiplication assignment to sparse vectors.
2378  //**********************************************************************************************
2379 
2380  //**Compile time checks*************************************************************************
2388  //**********************************************************************************************
2389 };
2390 //*************************************************************************************************
2391 
2392 
2393 
2394 
2395 //=================================================================================================
2396 //
2397 // DVECSCALARMULTEXPR SPECIALIZATION
2398 //
2399 //=================================================================================================
2400 
2401 //*************************************************************************************************
2410 template< typename MT // Type of the left-hand side dense matrix
2411  , typename VT // Type of the right-hand side dense vector
2412  , typename ST > // Type of the side scalar value
2413 class DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >
2414  : public DenseVector< DVecScalarMultExpr< TDMatDVecMultExpr<MT,VT>, ST, false >, false >
2415  , private VecScalarMultExpr
2416  , private Computation
2417 {
2418  private:
2419  //**Type definitions****************************************************************************
2420  typedef TDMatDVecMultExpr<MT,VT> MVM;
2421  typedef typename MVM::ResultType RES;
2422  typedef typename MT::ResultType MRT;
2423  typedef typename VT::ResultType VRT;
2424  typedef typename MRT::ElementType MET;
2425  typedef typename VRT::ElementType VET;
2426  typedef typename MT::CompositeType MCT;
2427  typedef typename VT::CompositeType VCT;
2428  //**********************************************************************************************
2429 
2430  //**********************************************************************************************
2432  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2433  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2434  //**********************************************************************************************
2435 
2436  //**********************************************************************************************
2438  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2439  //**********************************************************************************************
2440 
2441  //**********************************************************************************************
2443 
2446  template< typename T1 >
2447  struct UseSMPAssign {
2448  enum { value = ( evaluateMatrix || evaluateVector ) };
2449  };
2450  //**********************************************************************************************
2451 
2452  //**********************************************************************************************
2454 
2456  template< typename T1, typename T2, typename T3, typename T4 >
2457  struct UseBlasKernel {
2458  enum { value = BLAZE_BLAS_MODE &&
2459  HasMutableDataAccess<T1>::value &&
2460  HasConstDataAccess<T2>::value &&
2461  HasConstDataAccess<T3>::value &&
2462  !IsDiagonal<T2>::value &&
2463  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2464  IsBlasCompatible<typename T1::ElementType>::value &&
2465  IsBlasCompatible<typename T2::ElementType>::value &&
2466  IsBlasCompatible<typename T3::ElementType>::value &&
2467  IsSame< typename T1::ElementType, typename T2::ElementType >::value &&
2468  IsSame< typename T1::ElementType, typename T3::ElementType >::value &&
2469  !( IsBuiltin<typename T1::ElementType>::value && IsComplex<T4>::value ) };
2470  };
2471  //**********************************************************************************************
2472 
2473  //**********************************************************************************************
2475 
2478  template< typename T1, typename T2, typename T3, typename T4 >
2479  struct UseVectorizedDefaultKernel {
2480  enum { value = useOptimizedKernels &&
2481  !IsDiagonal<T2>::value &&
2482  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2483  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2484  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2485  IsSame<typename T1::ElementType,T4>::value &&
2486  IntrinsicTrait<typename T1::ElementType>::addition &&
2487  IntrinsicTrait<typename T1::ElementType>::multiplication };
2488  };
2489  //**********************************************************************************************
2490 
2491  public:
2492  //**Type definitions****************************************************************************
2493  typedef DVecScalarMultExpr<MVM,ST,false> This;
2494  typedef typename MultTrait<RES,ST>::Type ResultType;
2495  typedef typename ResultType::TransposeType TransposeType;
2496  typedef typename ResultType::ElementType ElementType;
2497  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2498  typedef const ElementType ReturnType;
2499  typedef const ResultType CompositeType;
2500 
2502  typedef const TDMatDVecMultExpr<MT,VT> LeftOperand;
2503 
2505  typedef ST RightOperand;
2506 
2508  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type LT;
2509 
2511  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type RT;
2512  //**********************************************************************************************
2513 
2514  //**Compilation flags***************************************************************************
2516  enum { vectorizable = !IsDiagonal<MT>::value &&
2517  MT::vectorizable && VT::vectorizable &&
2518  IsSame<MET,VET>::value &&
2519  IsSame<MET,ST>::value &&
2520  IntrinsicTrait<MET>::addition &&
2521  IntrinsicTrait<MET>::multiplication };
2522 
2524  enum { smpAssignable = !evaluateMatrix && MT::smpAssignable &&
2525  !evaluateVector && VT::smpAssignable };
2526  //**********************************************************************************************
2527 
2528  //**Constructor*********************************************************************************
2534  explicit inline DVecScalarMultExpr( const MVM& vector, ST scalar )
2535  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2536  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2537  {}
2538  //**********************************************************************************************
2539 
2540  //**Subscript operator**************************************************************************
2546  inline ReturnType operator[]( size_t index ) const {
2547  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2548  return vector_[index] * scalar_;
2549  }
2550  //**********************************************************************************************
2551 
2552  //**At function*********************************************************************************
2559  inline ReturnType at( size_t index ) const {
2560  if( index >= vector_.size() ) {
2561  BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
2562  }
2563  return (*this)[index];
2564  }
2565  //**********************************************************************************************
2566 
2567  //**Size function*******************************************************************************
2572  inline size_t size() const {
2573  return vector_.size();
2574  }
2575  //**********************************************************************************************
2576 
2577  //**Left operand access*************************************************************************
2582  inline LeftOperand leftOperand() const {
2583  return vector_;
2584  }
2585  //**********************************************************************************************
2586 
2587  //**Right operand access************************************************************************
2592  inline RightOperand rightOperand() const {
2593  return scalar_;
2594  }
2595  //**********************************************************************************************
2596 
2597  //**********************************************************************************************
2603  template< typename T >
2604  inline bool canAlias( const T* alias ) const {
2605  return vector_.canAlias( alias );
2606  }
2607  //**********************************************************************************************
2608 
2609  //**********************************************************************************************
2615  template< typename T >
2616  inline bool isAliased( const T* alias ) const {
2617  return vector_.isAliased( alias );
2618  }
2619  //**********************************************************************************************
2620 
2621  //**********************************************************************************************
2626  inline bool isAligned() const {
2627  return vector_.isAligned();
2628  }
2629  //**********************************************************************************************
2630 
2631  //**********************************************************************************************
2636  inline bool canSMPAssign() const {
2637  typename MVM::LeftOperand A( vector_.leftOperand() );
2638  return ( !BLAZE_BLAS_IS_PARALLEL ||
2639  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2640  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) ) &&
2641  ( size() > SMP_TDMATDVECMULT_THRESHOLD );
2642  }
2643  //**********************************************************************************************
2644 
2645  private:
2646  //**Member variables****************************************************************************
2647  LeftOperand vector_;
2648  RightOperand scalar_;
2649  //**********************************************************************************************
2650 
2651  //**Assignment to dense vectors*****************************************************************
2663  template< typename VT1 > // Type of the target dense vector
2664  friend inline void assign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
2665  {
2667 
2668  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2669 
2670  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
2671  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
2672 
2673  if( left.rows() == 0UL ) {
2674  return;
2675  }
2676  else if( left.columns() == 0UL ) {
2677  reset( ~lhs );
2678  return;
2679  }
2680 
2681  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2682  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
2683 
2684  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2685  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
2686  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
2687  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
2688 
2689  DVecScalarMultExpr::selectAssignKernel( ~lhs, A, x, rhs.scalar_ );
2690  }
2691  //**********************************************************************************************
2692 
2693  //**Assignment to dense vectors (kernel selection)**********************************************
2704  template< typename VT1 // Type of the left-hand side target vector
2705  , typename MT1 // Type of the left-hand side matrix operand
2706  , typename VT2 // Type of the right-hand side vector operand
2707  , typename ST2 > // Type of the scalar value
2708  static inline void selectAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2709  {
2710  if( ( IsDiagonal<MT1>::value ) ||
2711  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2712  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
2713  selectSmallAssignKernel( y, A, x, scalar );
2714  else
2715  selectBlasAssignKernel( y, A, x, scalar );
2716  }
2717  //**********************************************************************************************
2718 
2719  //**Default assignment to dense vectors*********************************************************
2733  template< typename VT1 // Type of the left-hand side target vector
2734  , typename MT1 // Type of the left-hand side matrix operand
2735  , typename VT2 // Type of the right-hand side vector operand
2736  , typename ST2 > // Type of the scalar value
2737  static inline void selectDefaultAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2738  {
2739  const size_t M( A.rows() );
2740  const size_t N( A.columns() );
2741 
2742  if( IsStrictlyLower<MT1>::value ) {
2743  reset( y[0] );
2744  }
2745 
2746  if( !IsUpper<MT1>::value )
2747  {
2748  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<M; ++i ) {
2749  y[i] = A(i,0UL) * x[0UL];
2750  }
2751  }
2752 
2753  for( size_t j=( IsUpper<MT1>::value && !IsStrictlyUpper<MT1>::value ? 0UL : 1UL ); j<N; ++j )
2754  {
2755  if( IsDiagonal<MT1>::value )
2756  {
2757  y[j] = A(j,j) * x[j] * scalar;
2758  }
2759  else
2760  {
2761  const size_t ibegin( ( IsLower<MT1>::value )
2762  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2763  :( 0UL ) );
2764  const size_t iend( ( IsUpper<MT1>::value )
2765  ?( IsStrictlyUpper<MT1>::value ? j-1UL : j )
2766  :( M ) );
2767  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2768 
2769  const size_t inum( iend - ibegin );
2770  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2771 
2772  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2773  y[i ] += A(i ,j) * x[j];
2774  y[i+1UL] += A(i+1UL,j) * x[j];
2775  }
2776  if( ipos < iend ) {
2777  y[ipos] += A(ipos,j) * x[j];
2778  }
2779  if( IsUpper<MT1>::value ) {
2780  y[iend] = A(iend,j) * x[j];
2781  }
2782  }
2783  }
2784 
2785  if( IsStrictlyUpper<MT1>::value ) {
2786  reset( y[M-1UL] );
2787  }
2788 
2789  if( !IsDiagonal<MT1>::value )
2790  {
2791  const size_t iend( IsStrictlyUpper<MT1>::value ? M-1UL : M );
2792  for( size_t i=( IsStrictlyLower<MT1>::value ? 1UL : 0UL ); i<iend; ++i ) {
2793  y[i] *= scalar;
2794  }
2795  }
2796  }
2797  //**********************************************************************************************
2798 
2799  //**Default assignment to dense vectors (small matrices)****************************************
2813  template< typename VT1 // Type of the left-hand side target vector
2814  , typename MT1 // Type of the left-hand side matrix operand
2815  , typename VT2 // Type of the right-hand side vector operand
2816  , typename ST2 > // Type of the scalar value
2817  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2818  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2819  {
2820  selectDefaultAssignKernel( y, A, x, scalar );
2821  }
2822  //**********************************************************************************************
2823 
2824  //**Vectorized default assignment to dense vectors (small matrices)*****************************
2838  template< typename VT1 // Type of the left-hand side target vector
2839  , typename MT1 // Type of the left-hand side matrix operand
2840  , typename VT2 // Type of the right-hand side vector operand
2841  , typename ST2 > // Type of the scalar value
2842  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
2843  selectSmallAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
2844  {
2845  typedef IntrinsicTrait<ElementType> IT;
2846 
2847  const size_t M( A.rows() );
2848  const size_t N( A.columns() );
2849 
2850  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
2851 
2852  const size_t ipos( remainder ? ( M & size_t(-IT::size) ) : M );
2853  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % IT::size ) ) == ipos, "Invalid end calculation" );
2854 
2855  const IntrinsicType factor( set( scalar ) );
2856 
2857  size_t i( 0UL );
2858 
2859  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
2860  {
2861  const size_t jbegin( ( IsUpper<MT1>::value )
2862  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2863  :( 0UL ) );
2864  const size_t jend( ( IsLower<MT1>::value )
2865  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2866  :( N ) );
2867  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2868 
2869  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2870 
2871  for( size_t j=jbegin; j<jend; ++j ) {
2872  const IntrinsicType x1( set( x[j] ) );
2873  xmm1 = xmm1 + A.load(i ,j) * x1;
2874  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2875  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2876  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2877  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
2878  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
2879  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
2880  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
2881  }
2882 
2883  y.store( i , xmm1*factor );
2884  y.store( i+IT::size , xmm2*factor );
2885  y.store( i+IT::size*2UL, xmm3*factor );
2886  y.store( i+IT::size*3UL, xmm4*factor );
2887  y.store( i+IT::size*4UL, xmm5*factor );
2888  y.store( i+IT::size*5UL, xmm6*factor );
2889  y.store( i+IT::size*6UL, xmm7*factor );
2890  y.store( i+IT::size*7UL, xmm8*factor );
2891  }
2892 
2893  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
2894  {
2895  const size_t jbegin( ( IsUpper<MT1>::value )
2896  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2897  :( 0UL ) );
2898  const size_t jend( ( IsLower<MT1>::value )
2899  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2900  :( N ) );
2901  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2902 
2903  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2904 
2905  for( size_t j=jbegin; j<jend; ++j ) {
2906  const IntrinsicType x1( set( x[j] ) );
2907  xmm1 = xmm1 + A.load(i ,j) * x1;
2908  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2909  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2910  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
2911  }
2912 
2913  y.store( i , xmm1*factor );
2914  y.store( i+IT::size , xmm2*factor );
2915  y.store( i+IT::size*2UL, xmm3*factor );
2916  y.store( i+IT::size*3UL, xmm4*factor );
2917  }
2918 
2919  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
2920  {
2921  const size_t jbegin( ( IsUpper<MT1>::value )
2922  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2923  :( 0UL ) );
2924  const size_t jend( ( IsLower<MT1>::value )
2925  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2926  :( N ) );
2927  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2928 
2929  IntrinsicType xmm1, xmm2, xmm3;
2930 
2931  for( size_t j=jbegin; j<jend; ++j ) {
2932  const IntrinsicType x1( set( x[j] ) );
2933  xmm1 = xmm1 + A.load(i ,j) * x1;
2934  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
2935  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
2936  }
2937 
2938  y.store( i , xmm1*factor );
2939  y.store( i+IT::size , xmm2*factor );
2940  y.store( i+IT::size*2UL, xmm3*factor );
2941  }
2942 
2943  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
2944  {
2945  const size_t jbegin( ( IsUpper<MT1>::value )
2946  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2947  :( 0UL ) );
2948  const size_t jend( ( IsLower<MT1>::value )
2949  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2950  :( N ) );
2951  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2952 
2953  IntrinsicType xmm1, xmm2;
2954 
2955  for( size_t j=jbegin; j<jend; ++j ) {
2956  const IntrinsicType x1( set( x[j] ) );
2957  xmm1 = xmm1 + A.load(i ,j) * x1;
2958  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
2959  }
2960 
2961  y.store( i , xmm1*factor );
2962  y.store( i+IT::size, xmm2*factor );
2963  }
2964 
2965  for( ; i<ipos; i+=IT::size )
2966  {
2967  const size_t jbegin( ( IsUpper<MT1>::value )
2968  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2969  :( 0UL ) );
2970  const size_t jend( ( IsLower<MT1>::value )
2971  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2972  :( N ) );
2973  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2974 
2975  IntrinsicType xmm1;
2976 
2977  for( size_t j=jbegin; j<jend; ++j ) {
2978  const IntrinsicType x1( set( x[j] ) );
2979  xmm1 = xmm1 + A.load(i,j) * x1;
2980  }
2981 
2982  y.store( i, xmm1*factor );
2983  }
2984 
2985  for( ; remainder && i<M; ++i )
2986  {
2987  const size_t jbegin( ( IsUpper<MT1>::value )
2988  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2989  :( 0UL ) );
2990  const size_t jend( ( IsLower<MT1>::value )
2991  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
2992  :( N ) );
2993  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2994 
2995  ElementType value = ElementType();
2996 
2997  for( size_t j=jbegin; j<jend; ++j ) {
2998  value += A(i,j) * x[j];
2999  }
3000 
3001  y[i] = value * scalar;
3002  }
3003  }
3004  //**********************************************************************************************
3005 
3006  //**Default assignment to dense vectors (large matrices)****************************************
3020  template< typename VT1 // Type of the left-hand side target vector
3021  , typename MT1 // Type of the left-hand side matrix operand
3022  , typename VT2 // Type of the right-hand side vector operand
3023  , typename ST2 > // Type of the scalar value
3024  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3025  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3026  {
3027  selectDefaultAssignKernel( y, A, x, scalar );
3028  }
3029  //**********************************************************************************************
3030 
3031  //**Vectorized default assignment to dense vectors (large matrices)*****************************
3045  template< typename VT1 // Type of the left-hand side target vector
3046  , typename MT1 // Type of the left-hand side matrix operand
3047  , typename VT2 // Type of the right-hand side vector operand
3048  , typename ST2 > // Type of the scalar value
3049  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3050  selectLargeAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3051  {
3052  typedef IntrinsicTrait<ElementType> IT;
3053 
3054  const size_t M( A.rows() );
3055  const size_t N( A.columns() );
3056 
3057  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3058 
3059  const size_t iblock( 32768UL / sizeof( ElementType ) );
3060  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3061 
3062  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
3063 
3064  const IntrinsicType factor( set( scalar ) );
3065 
3066  reset( y );
3067 
3068  for( size_t ii=0U; ii<M; ii+=iblock ) {
3069  for( size_t jj=0UL; jj<N; jj+=jblock )
3070  {
3071  const size_t jend( min( jj+jblock, N ) );
3072  const size_t itmp( min( ii+iblock, M ) );
3073  const size_t iend( ( IsUpper<MT1>::value )
3074  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3075  :( itmp ) );
3076 
3077  const size_t ipos( remainder ? ( iend & size_t(-IT::size) ) : iend );
3078  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % IT::size ) ) == ipos, "Invalid end calculation" );
3079 
3080  size_t i( ( IsLower<MT1>::value )
3081  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
3082  :( ii ) );
3083 
3084  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
3085  {
3086  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3087 
3088  for( size_t j=jj; j<jend; ++j ) {
3089  const IntrinsicType x1( set( x[j] ) );
3090  xmm1 = xmm1 + A.load(i ,j) * x1;
3091  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3092  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3093  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3094  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3095  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3096  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3097  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3098  }
3099 
3100  y.store( i , y.load(i ) + xmm1*factor );
3101  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3102  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3103  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3104  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
3105  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
3106  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
3107  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
3108  }
3109 
3110  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
3111  {
3112  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3113 
3114  for( size_t j=jj; j<jend; ++j ) {
3115  const IntrinsicType x1( set( x[j] ) );
3116  xmm1 = xmm1 + A.load(i ,j) * x1;
3117  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3118  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3119  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3120  }
3121 
3122  y.store( i , y.load(i ) + xmm1*factor );
3123  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3124  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3125  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3126  }
3127 
3128  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
3129  {
3130  IntrinsicType xmm1, xmm2, xmm3;
3131 
3132  for( size_t j=jj; j<jend; ++j ) {
3133  const IntrinsicType x1( set( x[j] ) );
3134  xmm1 = xmm1 + A.load(i ,j) * x1;
3135  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3136  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3137  }
3138 
3139  y.store( i , y.load(i ) + xmm1*factor );
3140  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3141  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3142  }
3143 
3144  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
3145  {
3146  IntrinsicType xmm1, xmm2;
3147 
3148  for( size_t j=jj; j<jend; ++j ) {
3149  const IntrinsicType x1( set( x[j] ) );
3150  xmm1 = xmm1 + A.load(i ,j) * x1;
3151  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
3152  }
3153 
3154  y.store( i , y.load(i ) + xmm1*factor );
3155  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
3156  }
3157 
3158  for( ; i<ipos; i+=IT::size )
3159  {
3160  IntrinsicType xmm1;
3161 
3162  for( size_t j=jj; j<jend; ++j ) {
3163  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
3164  }
3165 
3166  y.store( i, y.load(i) + xmm1*factor );
3167  }
3168 
3169  for( ; remainder && i<iend; ++i )
3170  {
3171  ElementType value = ElementType();
3172 
3173  for( size_t j=jj; j<jend; ++j ) {
3174  value += A(i,j) * x[j];
3175  }
3176 
3177  y[i] += value * scalar;
3178  }
3179  }
3180  }
3181  }
3182  //**********************************************************************************************
3183 
3184  //**BLAS-based assignment to dense vectors (default)********************************************
3198  template< typename VT1 // Type of the left-hand side target vector
3199  , typename MT1 // Type of the left-hand side matrix operand
3200  , typename VT2 // Type of the right-hand side vector operand
3201  , typename ST2 > // Type of the scalar value
3202  static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
3203  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3204  {
3205  selectLargeAssignKernel( y, A, x, scalar );
3206  }
3207  //**********************************************************************************************
3208 
3209  //**BLAS-based assignment to dense vectors******************************************************
3210 #if BLAZE_BLAS_MODE
3211 
3224  template< typename VT1 // Type of the left-hand side target vector
3225  , typename MT1 // Type of the left-hand side matrix operand
3226  , typename VT2 // Type of the right-hand side vector operand
3227  , typename ST2 > // Type of the scalar value
3228  static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
3229  selectBlasAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3230  {
3231  typedef typename VT1::ElementType ET;
3232 
3233  if( IsTriangular<MT1>::value ) {
3234  assign( y, scalar * x );
3235  trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3236  }
3237  else {
3238  gemv( y, A, x, ET(scalar), ET(0) );
3239  }
3240  }
3241 #endif
3242  //**********************************************************************************************
3243 
3244  //**Assignment to sparse vectors****************************************************************
3256  template< typename VT1 > // Type of the target sparse vector
3257  friend inline void assign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3258  {
3260 
3264 
3265  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3266 
3267  const ResultType tmp( serial( rhs ) );
3268  assign( ~lhs, tmp );
3269  }
3270  //**********************************************************************************************
3271 
3272  //**Addition assignment to dense vectors********************************************************
3284  template< typename VT1 > // Type of the target dense vector
3285  friend inline void addAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3286  {
3288 
3289  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3290 
3291  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3292  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3293 
3294  if( left.rows() == 0UL || left.columns() == 0UL ) {
3295  return;
3296  }
3297 
3298  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3299  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3300 
3301  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3302  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3303  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3304  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3305 
3306  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, A, x, rhs.scalar_ );
3307  }
3308  //**********************************************************************************************
3309 
3310  //**Addition assignment to dense vectors (kernel selection)*************************************
3321  template< typename VT1 // Type of the left-hand side target vector
3322  , typename MT1 // Type of the left-hand side matrix operand
3323  , typename VT2 // Type of the right-hand side vector operand
3324  , typename ST2 > // Type of the scalar value
3325  static inline void selectAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3326  {
3327  if( ( IsDiagonal<MT1>::value ) ||
3328  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3329  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3330  selectSmallAddAssignKernel( y, A, x, scalar );
3331  else
3332  selectBlasAddAssignKernel( y, A, x, scalar );
3333  }
3334  //**********************************************************************************************
3335 
3336  //**Default addition assignment to dense vectors************************************************
3350  template< typename VT1 // Type of the left-hand side target vector
3351  , typename MT1 // Type of the left-hand side matrix operand
3352  , typename VT2 // Type of the right-hand side vector operand
3353  , typename ST2 > // Type of the scalar value
3354  static inline void selectDefaultAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3355  {
3356  y.addAssign( A * x * scalar );
3357  }
3358  //**********************************************************************************************
3359 
3360  //**Default addition assignment to dense vectors (small matrices)*******************************
3374  template< typename VT1 // Type of the left-hand side target vector
3375  , typename MT1 // Type of the left-hand side matrix operand
3376  , typename VT2 // Type of the right-hand side vector operand
3377  , typename ST2 > // Type of the scalar value
3378  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3379  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3380  {
3381  selectDefaultAddAssignKernel( y, A, x, scalar );
3382  }
3383  //**********************************************************************************************
3384 
3385  //**Vectorized default addition assignment to dense vectors (small matrices)********************
3400  template< typename VT1 // Type of the left-hand side target vector
3401  , typename MT1 // Type of the left-hand side matrix operand
3402  , typename VT2 // Type of the right-hand side vector operand
3403  , typename ST2 > // Type of the scalar value
3404  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3405  selectSmallAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3406  {
3407  typedef IntrinsicTrait<ElementType> IT;
3408 
3409  const size_t M( A.rows() );
3410  const size_t N( A.columns() );
3411 
3412  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3413 
3414  const size_t ipos( remainder ? ( M & size_t(-IT::size) ) : M );
3415  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % IT::size ) ) == ipos, "Invalid end calculation" );
3416 
3417  const IntrinsicType factor( set( scalar ) );
3418 
3419  size_t i( 0UL );
3420 
3421  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
3422  {
3423  const size_t jbegin( ( IsUpper<MT1>::value )
3424  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3425  :( 0UL ) );
3426  const size_t jend( ( IsLower<MT1>::value )
3427  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3428  :( N ) );
3429  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3430 
3431  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3432 
3433  for( size_t j=jbegin; j<jend; ++j ) {
3434  const IntrinsicType x1( set( x[j] ) );
3435  xmm1 = xmm1 + A.load(i ,j) * x1;
3436  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3437  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3438  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3439  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3440  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3441  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3442  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3443  }
3444 
3445  y.store( i , y.load(i ) + xmm1*factor );
3446  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3447  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3448  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3449  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
3450  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
3451  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
3452  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
3453  }
3454 
3455  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
3456  {
3457  const size_t jbegin( ( IsUpper<MT1>::value )
3458  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3459  :( 0UL ) );
3460  const size_t jend( ( IsLower<MT1>::value )
3461  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3462  :( N ) );
3463  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3464 
3465  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3466 
3467  for( size_t j=jbegin; j<jend; ++j ) {
3468  const IntrinsicType x1( set( x[j] ) );
3469  xmm1 = xmm1 + A.load(i ,j) * x1;
3470  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3471  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3472  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3473  }
3474 
3475  y.store( i , y.load(i ) + xmm1*factor );
3476  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3477  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3478  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3479  }
3480 
3481  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
3482  {
3483  const size_t jbegin( ( IsUpper<MT1>::value )
3484  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3485  :( 0UL ) );
3486  const size_t jend( ( IsLower<MT1>::value )
3487  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3488  :( N ) );
3489  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3490 
3491  IntrinsicType xmm1, xmm2, xmm3;
3492 
3493  for( size_t j=jbegin; j<jend; ++j ) {
3494  const IntrinsicType x1( set( x[j] ) );
3495  xmm1 = xmm1 + A.load(i ,j) * x1;
3496  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3497  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3498  }
3499 
3500  y.store( i , y.load(i ) + xmm1*factor );
3501  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3502  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3503  }
3504 
3505  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
3506  {
3507  const size_t jbegin( ( IsUpper<MT1>::value )
3508  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3509  :( 0UL ) );
3510  const size_t jend( ( IsLower<MT1>::value )
3511  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3512  :( N ) );
3513  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3514 
3515  IntrinsicType xmm1, xmm2;
3516 
3517  for( size_t j=jbegin; j<jend; ++j ) {
3518  const IntrinsicType x1( set( x[j] ) );
3519  xmm1 = xmm1 + A.load(i ,j) * x1;
3520  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
3521  }
3522 
3523  y.store( i , y.load(i ) + xmm1*factor );
3524  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
3525  }
3526 
3527  for( ; i<ipos; i+=IT::size )
3528  {
3529  const size_t jbegin( ( IsUpper<MT1>::value )
3530  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3531  :( 0UL ) );
3532  const size_t jend( ( IsLower<MT1>::value )
3533  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3534  :( N ) );
3535  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3536 
3537  IntrinsicType xmm1;
3538 
3539  for( size_t j=jbegin; j<jend; ++j ) {
3540  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
3541  }
3542 
3543  y.store( i, y.load(i) + xmm1*factor );
3544  }
3545 
3546  for( ; remainder && i<M; ++i )
3547  {
3548  const size_t jbegin( ( IsUpper<MT1>::value )
3549  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3550  :( 0UL ) );
3551  const size_t jend( ( IsLower<MT1>::value )
3552  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3553  :( N ) );
3554  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3555 
3556  ElementType value = ElementType();
3557 
3558  for( size_t j=jbegin; j<jend; ++j ) {
3559  value += A(i,j) * x[j];
3560  }
3561 
3562  y[i] += value * scalar;
3563  }
3564  }
3565  //**********************************************************************************************
3566 
3567  //**Default addition assignment to dense vectors (large matrices)*******************************
3581  template< typename VT1 // Type of the left-hand side target vector
3582  , typename MT1 // Type of the left-hand side matrix operand
3583  , typename VT2 // Type of the right-hand side vector operand
3584  , typename ST2 > // Type of the scalar value
3585  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3586  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3587  {
3588  selectDefaultAddAssignKernel( y, A, x, scalar );
3589  }
3590  //**********************************************************************************************
3591 
3592  //**Vectorized default addition assignment to dense vectors (large matrices)********************
3607  template< typename VT1 // Type of the left-hand side target vector
3608  , typename MT1 // Type of the left-hand side matrix operand
3609  , typename VT2 // Type of the right-hand side vector operand
3610  , typename ST2 > // Type of the scalar value
3611  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3612  selectLargeAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3613  {
3614  typedef IntrinsicTrait<ElementType> IT;
3615 
3616  const size_t M( A.rows() );
3617  const size_t N( A.columns() );
3618 
3619  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3620 
3621  const size_t iblock( 32768UL / sizeof( ElementType ) );
3622  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
3623 
3624  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
3625 
3626  const IntrinsicType factor( set( scalar ) );
3627 
3628  for( size_t ii=0U; ii<M; ii+=iblock ) {
3629  for( size_t jj=0UL; jj<N; jj+=jblock )
3630  {
3631  const size_t jend( min( jj+jblock, N ) );
3632  const size_t itmp( min( ii+iblock, M ) );
3633  const size_t iend( ( IsUpper<MT1>::value )
3634  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
3635  :( itmp ) );
3636 
3637  const size_t ipos( remainder ? ( iend & size_t(-IT::size) ) : iend );
3638  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % IT::size ) ) == ipos, "Invalid end calculation" );
3639 
3640  size_t i( ( IsLower<MT1>::value )
3641  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
3642  :( ii ) );
3643 
3644  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
3645  {
3646  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3647 
3648  for( size_t j=jj; j<jend; ++j ) {
3649  const IntrinsicType x1( set( x[j] ) );
3650  xmm1 = xmm1 + A.load(i ,j) * x1;
3651  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3652  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3653  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3654  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3655  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3656  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3657  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3658  }
3659 
3660  y.store( i , y.load(i ) + xmm1*factor );
3661  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3662  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3663  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3664  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) + xmm5*factor );
3665  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) + xmm6*factor );
3666  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) + xmm7*factor );
3667  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) + xmm8*factor );
3668  }
3669 
3670  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
3671  {
3672  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3673 
3674  for( size_t j=jj; j<jend; ++j ) {
3675  const IntrinsicType x1( set( x[j] ) );
3676  xmm1 = xmm1 + A.load(i ,j) * x1;
3677  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3678  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3679  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3680  }
3681 
3682  y.store( i , y.load(i ) + xmm1*factor );
3683  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3684  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3685  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) + xmm4*factor );
3686  }
3687 
3688  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
3689  {
3690  IntrinsicType xmm1, xmm2, xmm3;
3691 
3692  for( size_t j=jj; j<jend; ++j ) {
3693  const IntrinsicType x1( set( x[j] ) );
3694  xmm1 = xmm1 + A.load(i ,j) * x1;
3695  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3696  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3697  }
3698 
3699  y.store( i , y.load(i ) + xmm1*factor );
3700  y.store( i+IT::size , y.load(i+IT::size ) + xmm2*factor );
3701  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) + xmm3*factor );
3702  }
3703 
3704  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
3705  {
3706  IntrinsicType xmm1, xmm2;
3707 
3708  for( size_t j=jj; j<jend; ++j ) {
3709  const IntrinsicType x1( set( x[j] ) );
3710  xmm1 = xmm1 + A.load(i ,j) * x1;
3711  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
3712  }
3713 
3714  y.store( i , y.load(i ) + xmm1*factor );
3715  y.store( i+IT::size, y.load(i+IT::size) + xmm2*factor );
3716  }
3717 
3718  for( ; i<ipos; i+=IT::size )
3719  {
3720  IntrinsicType xmm1;
3721 
3722  for( size_t j=jj; j<jend; ++j ) {
3723  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
3724  }
3725 
3726  y.store( i, y.load(i) + xmm1*factor );
3727  }
3728 
3729  for( ; remainder && i<iend; ++i )
3730  {
3731  ElementType value = ElementType();
3732 
3733  for( size_t j=jj; j<jend; ++j ) {
3734  value += A(i,j) * x[j];
3735  }
3736 
3737  y[i] += value * scalar;
3738  }
3739  }
3740  }
3741  }
3742  //**********************************************************************************************
3743 
3744  //**BLAS-based addition assignment to dense vectors (default)***********************************
3758  template< typename VT1 // Type of the left-hand side target vector
3759  , typename MT1 // Type of the left-hand side matrix operand
3760  , typename VT2 // Type of the right-hand side vector operand
3761  , typename ST2 > // Type of the scalar value
3762  static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
3763  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3764  {
3765  selectLargeAddAssignKernel( y, A, x, scalar );
3766  }
3767  //**********************************************************************************************
3768 
3769  //**BLAS-based addition assignment to dense vectors*********************************************
3770 #if BLAZE_BLAS_MODE
3771 
3784  template< typename VT1 // Type of the left-hand side target vector
3785  , typename MT1 // Type of the left-hand side matrix operand
3786  , typename VT2 // Type of the right-hand side vector operand
3787  , typename ST2 > // Type of the scalar value
3788  static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
3789  selectBlasAddAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3790  {
3791  typedef typename VT1::ElementType ET;
3792 
3793  if( IsTriangular<MT1>::value ) {
3794  typename VT1::ResultType tmp( serial( scalar * x ) );
3795  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3796  addAssign( y, tmp );
3797  }
3798  else {
3799  gemv( y, A, x, ET(scalar), ET(1) );
3800  }
3801  }
3802 #endif
3803  //**********************************************************************************************
3804 
3805  //**Addition assignment to sparse vectors*******************************************************
3806  // No special implementation for the addition assignment to sparse vectors.
3807  //**********************************************************************************************
3808 
3809  //**Subtraction assignment to dense vectors*****************************************************
3821  template< typename VT1 > // Type of the target dense vector
3822  friend inline void subAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
3823  {
3825 
3826  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3827 
3828  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
3829  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
3830 
3831  if( left.rows() == 0UL || left.columns() == 0UL ) {
3832  return;
3833  }
3834 
3835  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3836  RT x( serial( right ) ); // Evaluation of the right-hand side dense vector operand
3837 
3838  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3839  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
3840  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
3841  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
3842 
3843  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, A, x, rhs.scalar_ );
3844  }
3845  //**********************************************************************************************
3846 
3847  //**Subtraction assignment to dense vectors (kernel selection)**********************************
3858  template< typename VT1 // Type of the left-hand side target vector
3859  , typename MT1 // Type of the left-hand side matrix operand
3860  , typename VT2 // Type of the right-hand side vector operand
3861  , typename ST2 > // Type of the scalar value
3862  static inline void selectSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3863  {
3864  if( ( IsDiagonal<MT1>::value ) ||
3865  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3866  ( A.rows() * A.columns() < TDMATDVECMULT_THRESHOLD ) )
3867  selectSmallSubAssignKernel( y, A, x, scalar );
3868  else
3869  selectBlasSubAssignKernel( y, A, x, scalar );
3870  }
3871  //**********************************************************************************************
3872 
3873  //**Default subtraction assignment to dense vectors*********************************************
3887  template< typename VT1 // Type of the left-hand side target vector
3888  , typename MT1 // Type of the left-hand side matrix operand
3889  , typename VT2 // Type of the right-hand side vector operand
3890  , typename ST2 > // Type of the scalar value
3891  static inline void selectDefaultSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3892  {
3893  y.subAssign( A * x * scalar );
3894  }
3895  //**********************************************************************************************
3896 
3897  //**Default subtraction assignment to dense vectors (small matrices)****************************
3911  template< typename VT1 // Type of the left-hand side target vector
3912  , typename MT1 // Type of the left-hand side matrix operand
3913  , typename VT2 // Type of the right-hand side vector operand
3914  , typename ST2 > // Type of the scalar value
3915  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3916  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3917  {
3918  selectDefaultSubAssignKernel( y, A, x, scalar );
3919  }
3920  //**********************************************************************************************
3921 
3922  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
3937  template< typename VT1 // Type of the left-hand side target vector
3938  , typename MT1 // Type of the left-hand side matrix operand
3939  , typename VT2 // Type of the right-hand side vector operand
3940  , typename ST2 > // Type of the scalar value
3941  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
3942  selectSmallSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
3943  {
3944  typedef IntrinsicTrait<ElementType> IT;
3945 
3946  const size_t M( A.rows() );
3947  const size_t N( A.columns() );
3948 
3949  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
3950 
3951  const size_t ipos( remainder ? ( M & size_t(-IT::size) ) : M );
3952  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % IT::size ) ) == ipos, "Invalid end calculation" );
3953 
3954  const IntrinsicType factor( set( scalar ) );
3955 
3956  size_t i( 0UL );
3957 
3958  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
3959  {
3960  const size_t jbegin( ( IsUpper<MT1>::value )
3961  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3962  :( 0UL ) );
3963  const size_t jend( ( IsLower<MT1>::value )
3964  ?( min( i+IT::size*8UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3965  :( N ) );
3966  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3967 
3968  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3969 
3970  for( size_t j=jbegin; j<jend; ++j ) {
3971  const IntrinsicType x1( set( x[j] ) );
3972  xmm1 = xmm1 + A.load(i ,j) * x1;
3973  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
3974  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
3975  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
3976  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
3977  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
3978  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
3979  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
3980  }
3981 
3982  y.store( i , y.load(i ) - xmm1*factor );
3983  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
3984  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
3985  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
3986  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5*factor );
3987  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6*factor );
3988  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7*factor );
3989  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8*factor );
3990  }
3991 
3992  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
3993  {
3994  const size_t jbegin( ( IsUpper<MT1>::value )
3995  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
3996  :( 0UL ) );
3997  const size_t jend( ( IsLower<MT1>::value )
3998  ?( min( i+IT::size*4UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
3999  :( N ) );
4000  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4001 
4002  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4003 
4004  for( size_t j=jbegin; j<jend; ++j ) {
4005  const IntrinsicType x1( set( x[j] ) );
4006  xmm1 = xmm1 + A.load(i ,j) * x1;
4007  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4008  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4009  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
4010  }
4011 
4012  y.store( i , y.load(i ) - xmm1*factor );
4013  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4014  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4015  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
4016  }
4017 
4018  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
4019  {
4020  const size_t jbegin( ( IsUpper<MT1>::value )
4021  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4022  :( 0UL ) );
4023  const size_t jend( ( IsLower<MT1>::value )
4024  ?( min( i+IT::size*3UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4025  :( N ) );
4026  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4027 
4028  IntrinsicType xmm1, xmm2, xmm3;
4029 
4030  for( size_t j=jbegin; j<jend; ++j ) {
4031  const IntrinsicType x1( set( x[j] ) );
4032  xmm1 = xmm1 + A.load(i ,j) * x1;
4033  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4034  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4035  }
4036 
4037  y.store( i , y.load(i ) - xmm1*factor );
4038  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4039  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4040  }
4041 
4042  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
4043  {
4044  const size_t jbegin( ( IsUpper<MT1>::value )
4045  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4046  :( 0UL ) );
4047  const size_t jend( ( IsLower<MT1>::value )
4048  ?( min( i+IT::size*2UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4049  :( N ) );
4050  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4051 
4052  IntrinsicType xmm1, xmm2;
4053 
4054  for( size_t j=jbegin; j<jend; ++j ) {
4055  const IntrinsicType x1( set( x[j] ) );
4056  xmm1 = xmm1 + A.load(i ,j) * x1;
4057  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
4058  }
4059 
4060  y.store( i , y.load(i ) - xmm1*factor );
4061  y.store( i+IT::size, y.load(i+IT::size) - xmm2*factor );
4062  }
4063 
4064  for( ; i<ipos; i+=IT::size )
4065  {
4066  const size_t jbegin( ( IsUpper<MT1>::value )
4067  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4068  :( 0UL ) );
4069  const size_t jend( ( IsLower<MT1>::value )
4070  ?( min( i+IT::size, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4071  :( N ) );
4072  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4073 
4074  IntrinsicType xmm1;
4075 
4076  for( size_t j=jbegin; j<jend; ++j ) {
4077  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
4078  }
4079 
4080  y.store( i, y.load(i) - xmm1*factor );
4081  }
4082 
4083  for( ; remainder && i<M; ++i )
4084  {
4085  const size_t jbegin( ( IsUpper<MT1>::value )
4086  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
4087  :( 0UL ) );
4088  const size_t jend( ( IsLower<MT1>::value )
4089  ?( min( i+1UL, N ) - ( IsStrictlyLower<MT1>::value ? 1UL : 0UL ) )
4090  :( N ) );
4091  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4092 
4093  ElementType value = ElementType();
4094 
4095  for( size_t j=jbegin; j<jend; ++j ) {
4096  value += A(i,j) * x[j];
4097  }
4098 
4099  y[i] -= value * scalar;
4100  }
4101  }
4102  //**********************************************************************************************
4103 
4104  //**Default subtraction assignment to dense vectors (large matrices)****************************
4118  template< typename VT1 // Type of the left-hand side target vector
4119  , typename MT1 // Type of the left-hand side matrix operand
4120  , typename VT2 // Type of the right-hand side vector operand
4121  , typename ST2 > // Type of the scalar value
4122  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4123  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4124  {
4125  selectDefaultSubAssignKernel( y, A, x, scalar );
4126  }
4127  //**********************************************************************************************
4128 
4129  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
4144  template< typename VT1 // Type of the left-hand side target vector
4145  , typename MT1 // Type of the left-hand side matrix operand
4146  , typename VT2 // Type of the right-hand side vector operand
4147  , typename ST2 > // Type of the scalar value
4148  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,MT1,VT2,ST2> >::Type
4149  selectLargeSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4150  {
4151  typedef IntrinsicTrait<ElementType> IT;
4152 
4153  const size_t M( A.rows() );
4154  const size_t N( A.columns() );
4155 
4156  const bool remainder( !IsPadded<MT1>::value || !IsPadded<VT1>::value );
4157 
4158  const size_t iblock( 32768UL / sizeof( ElementType ) );
4159  const size_t jblock( ( N < iblock )?( 8UL ):( 4UL ) );
4160 
4161  BLAZE_INTERNAL_ASSERT( ( iblock % IT::size ) == 0UL, "Invalid block size detected" );
4162 
4163  const IntrinsicType factor( set( scalar ) );
4164 
4165  for( size_t ii=0U; ii<M; ii+=iblock ) {
4166  for( size_t jj=0UL; jj<N; jj+=jblock )
4167  {
4168  const size_t jend( min( jj+jblock, N ) );
4169  const size_t itmp( min( ii+iblock, M ) );
4170  const size_t iend( ( IsUpper<MT1>::value )
4171  ?( min( itmp, ( IsStrictlyUpper<MT1>::value ? jend-1UL : jend ) ) )
4172  :( itmp ) );
4173 
4174  const size_t ipos( remainder ? ( iend & size_t(-IT::size) ) : iend );
4175  BLAZE_INTERNAL_ASSERT( !remainder || ( iend - ( iend % IT::size ) ) == ipos, "Invalid end calculation" );
4176 
4177  size_t i( ( IsLower<MT1>::value )
4178  ?( max( ii, ( IsStrictlyLower<MT1>::value ? jj+1UL : jj ) & size_t(-IT::size) ) )
4179  :( ii ) );
4180 
4181  for( ; (i+IT::size*7UL) < ipos; i+=IT::size*8UL )
4182  {
4183  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4184 
4185  for( size_t j=jj; j<jend; ++j ) {
4186  const IntrinsicType x1( set( x[j] ) );
4187  xmm1 = xmm1 + A.load(i ,j) * x1;
4188  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4189  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4190  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
4191  xmm5 = xmm5 + A.load(i+IT::size*4UL,j) * x1;
4192  xmm6 = xmm6 + A.load(i+IT::size*5UL,j) * x1;
4193  xmm7 = xmm7 + A.load(i+IT::size*6UL,j) * x1;
4194  xmm8 = xmm8 + A.load(i+IT::size*7UL,j) * x1;
4195  }
4196 
4197  y.store( i , y.load(i ) - xmm1*factor );
4198  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4199  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4200  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
4201  y.store( i+IT::size*4UL, y.load(i+IT::size*4UL) - xmm5*factor );
4202  y.store( i+IT::size*5UL, y.load(i+IT::size*5UL) - xmm6*factor );
4203  y.store( i+IT::size*6UL, y.load(i+IT::size*6UL) - xmm7*factor );
4204  y.store( i+IT::size*7UL, y.load(i+IT::size*7UL) - xmm8*factor );
4205  }
4206 
4207  for( ; (i+IT::size*3UL) < ipos; i+=IT::size*4UL )
4208  {
4209  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4210 
4211  for( size_t j=jj; j<jend; ++j ) {
4212  const IntrinsicType x1( set( x[j] ) );
4213  xmm1 = xmm1 + A.load(i ,j) * x1;
4214  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4215  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4216  xmm4 = xmm4 + A.load(i+IT::size*3UL,j) * x1;
4217  }
4218 
4219  y.store( i , y.load(i ) - xmm1*factor );
4220  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4221  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4222  y.store( i+IT::size*3UL, y.load(i+IT::size*3UL) - xmm4*factor );
4223  }
4224 
4225  for( ; (i+IT::size*2UL) < ipos; i+=IT::size*3UL )
4226  {
4227  IntrinsicType xmm1, xmm2, xmm3;
4228 
4229  for( size_t j=jj; j<jend; ++j ) {
4230  const IntrinsicType x1( set( x[j] ) );
4231  xmm1 = xmm1 + A.load(i ,j) * x1;
4232  xmm2 = xmm2 + A.load(i+IT::size ,j) * x1;
4233  xmm3 = xmm3 + A.load(i+IT::size*2UL,j) * x1;
4234  }
4235 
4236  y.store( i , y.load(i ) - xmm1*factor );
4237  y.store( i+IT::size , y.load(i+IT::size ) - xmm2*factor );
4238  y.store( i+IT::size*2UL, y.load(i+IT::size*2UL) - xmm3*factor );
4239  }
4240 
4241  for( ; (i+IT::size) < ipos; i+=IT::size*2UL )
4242  {
4243  IntrinsicType xmm1, xmm2;
4244 
4245  for( size_t j=jj; j<jend; ++j ) {
4246  const IntrinsicType x1( set( x[j] ) );
4247  xmm1 = xmm1 + A.load(i ,j) * x1;
4248  xmm2 = xmm2 + A.load(i+IT::size,j) * x1;
4249  }
4250 
4251  y.store( i , y.load(i ) - xmm1*factor );
4252  y.store( i+IT::size, y.load(i+IT::size) - xmm2*factor );
4253  }
4254 
4255  for( ; i<ipos; i+=IT::size )
4256  {
4257  IntrinsicType xmm1;
4258 
4259  for( size_t j=jj; j<jend; ++j ) {
4260  xmm1 = xmm1 + A.load(i,j) * set( x[j] );
4261  }
4262 
4263  y.store( i, y.load(i) - xmm1*factor );
4264  }
4265 
4266  for( ; remainder && i<iend; ++i )
4267  {
4268  ElementType value = ElementType();
4269 
4270  for( size_t j=jj; j<jend; ++j ) {
4271  value += A(i,j) * x[j];
4272  }
4273 
4274  y[i] -= value * scalar;
4275  }
4276  }
4277  }
4278  }
4279  //**********************************************************************************************
4280 
4281  //**BLAS-based subtraction assignment to dense vectors (default)********************************
4295  template< typename VT1 // Type of the left-hand side target vector
4296  , typename MT1 // Type of the left-hand side matrix operand
4297  , typename VT2 // Type of the right-hand side vector operand
4298  , typename ST2 > // Type of the scalar value
4299  static inline typename DisableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
4300  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4301  {
4302  selectLargeSubAssignKernel( y, A, x, scalar );
4303  }
4304  //**********************************************************************************************
4305 
4306  //**BLAS-based subtraction assignment to dense vectors******************************************
4307 #if BLAZE_BLAS_MODE
4308 
4321  template< typename VT1 // Type of the left-hand side target vector
4322  , typename MT1 // Type of the left-hand side matrix operand
4323  , typename VT2 // Type of the right-hand side vector operand
4324  , typename ST2 > // Type of the scalar value
4325  static inline typename EnableIf< UseBlasKernel<VT1,MT1,VT2,ST2> >::Type
4326  selectBlasSubAssignKernel( VT1& y, const MT1& A, const VT2& x, ST2 scalar )
4327  {
4328  typedef typename VT1::ElementType ET;
4329 
4330  if( IsTriangular<MT1>::value ) {
4331  typename VT1::ResultType tmp( serial( scalar * x ) );
4332  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4333  subAssign( y, tmp );
4334  }
4335  else {
4336  gemv( y, A, x, ET(-scalar), ET(1) );
4337  }
4338  }
4339 #endif
4340  //**********************************************************************************************
4341 
4342  //**Subtraction assignment to sparse vectors****************************************************
4343  // No special implementation for the subtraction assignment to sparse vectors.
4344  //**********************************************************************************************
4345 
4346  //**Multiplication assignment to dense vectors**************************************************
4358  template< typename VT1 > // Type of the target dense vector
4359  friend inline void multAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4360  {
4362 
4366 
4367  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4368 
4369  const ResultType tmp( serial( rhs ) );
4370  multAssign( ~lhs, tmp );
4371  }
4372  //**********************************************************************************************
4373 
4374  //**Multiplication assignment to sparse vectors*************************************************
4375  // No special implementation for the multiplication assignment to sparse vectors.
4376  //**********************************************************************************************
4377 
4378  //**SMP assignment to dense vectors**************************************************************
4392  template< typename VT1 > // Type of the target dense vector
4393  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4394  smpAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4395  {
4397 
4398  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4399 
4400  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4401  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4402 
4403  if( left.rows() == 0UL ) {
4404  return;
4405  }
4406  else if( left.columns() == 0UL ) {
4407  reset( ~lhs );
4408  return;
4409  }
4410 
4411  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4412  RT x( right ); // Evaluation of the right-hand side dense vector operand
4413 
4414  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4415  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4416  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4417  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4418 
4419  smpAssign( ~lhs, A * x * rhs.scalar_ );
4420  }
4421  //**********************************************************************************************
4422 
4423  //**SMP assignment to sparse vectors************************************************************
4437  template< typename VT1 > // Type of the target sparse vector
4438  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4439  smpAssign( SparseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4440  {
4442 
4446 
4447  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4448 
4449  const ResultType tmp( rhs );
4450  smpAssign( ~lhs, tmp );
4451  }
4452  //**********************************************************************************************
4453 
4454  //**SMP addition assignment to dense vectors****************************************************
4468  template< typename VT1 > // Type of the target dense vector
4469  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4470  smpAddAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4471  {
4473 
4474  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4475 
4476  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4477  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4478 
4479  if( left.rows() == 0UL || left.columns() == 0UL ) {
4480  return;
4481  }
4482 
4483  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4484  RT x( right ); // Evaluation of the right-hand side dense vector operand
4485 
4486  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4487  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4488  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4489  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4490 
4491  smpAddAssign( ~lhs, A * x * rhs.scalar_ );
4492  }
4493  //**********************************************************************************************
4494 
4495  //**SMP addition assignment to sparse vectors***************************************************
4496  // No special implementation for the SMP addition assignment to sparse vectors.
4497  //**********************************************************************************************
4498 
4499  //**SMP subtraction assignment to dense vectors*************************************************
4513  template< typename VT1 > // Type of the target dense vector
4514  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4515  smpSubAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4516  {
4518 
4519  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4520 
4521  typename MVM::LeftOperand left ( rhs.vector_.leftOperand() );
4522  typename MVM::RightOperand right( rhs.vector_.rightOperand() );
4523 
4524  if( left.rows() == 0UL || left.columns() == 0UL ) {
4525  return;
4526  }
4527 
4528  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4529  RT x( right ); // Evaluation of the right-hand side dense vector operand
4530 
4531  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4532  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns(), "Invalid number of columns" );
4533  BLAZE_INTERNAL_ASSERT( x.size() == right.size() , "Invalid vector size" );
4534  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).size() , "Invalid vector size" );
4535 
4536  smpSubAssign( ~lhs, A * x * rhs.scalar_ );
4537  }
4538  //**********************************************************************************************
4539 
4540  //**SMP subtraction assignment to sparse vectors************************************************
4541  // No special implementation for the SMP subtraction assignment to sparse vectors.
4542  //**********************************************************************************************
4543 
4544  //**SMP multiplication assignment to dense vectors**********************************************
4559  template< typename VT1 > // Type of the target dense vector
4560  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4561  smpMultAssign( DenseVector<VT1,false>& lhs, const DVecScalarMultExpr& rhs )
4562  {
4564 
4568 
4569  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4570 
4571  const ResultType tmp( rhs );
4572  smpMultAssign( ~lhs, tmp );
4573  }
4574  //**********************************************************************************************
4575 
4576  //**SMP multiplication assignment to sparse vectors*********************************************
4577  // No special implementation for the SMP multiplication assignment to sparse vectors.
4578  //**********************************************************************************************
4579 
4580  //**Compile time checks*************************************************************************
4588  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
4589  //**********************************************************************************************
4590 };
4592 //*************************************************************************************************
4593 
4594 
4595 
4596 
4597 //=================================================================================================
4598 //
4599 // GLOBAL BINARY ARITHMETIC OPERATORS
4600 //
4601 //=================================================================================================
4602 
4603 //*************************************************************************************************
4634 template< typename T1 // Type of the left-hand side dense matrix
4635  , typename T2 > // Type of the right-hand side dense vector
4636 inline const typename DisableIf< IsMatMatMultExpr<T1>, TDMatDVecMultExpr<T1,T2> >::Type
4638 {
4640 
4641  if( (~mat).columns() != (~vec).size() ) {
4642  BLAZE_THROW_INVALID_ARGUMENT( "Matrix and vector sizes do not match" );
4643  }
4644 
4645  return TDMatDVecMultExpr<T1,T2>( ~mat, ~vec );
4646 }
4647 //*************************************************************************************************
4648 
4649 
4650 
4651 
4652 //=================================================================================================
4653 //
4654 // SIZE SPECIALIZATIONS
4655 //
4656 //=================================================================================================
4657 
4658 //*************************************************************************************************
4660 template< typename MT, typename VT >
4661 struct Size< TDMatDVecMultExpr<MT,VT> > : public Rows<MT>
4662 {};
4664 //*************************************************************************************************
4665 
4666 
4667 
4668 
4669 //=================================================================================================
4670 //
4671 // ISALIGNED SPECIALIZATIONS
4672 //
4673 //=================================================================================================
4674 
4675 //*************************************************************************************************
4677 template< typename MT, typename VT >
4678 struct IsAligned< TDMatDVecMultExpr<MT,VT> >
4679  : public IsTrue< And< IsAligned<MT>, IsAligned<VT> >::value >
4680 {};
4682 //*************************************************************************************************
4683 
4684 
4685 
4686 
4687 //=================================================================================================
4688 //
4689 // EXPRESSION TRAIT SPECIALIZATIONS
4690 //
4691 //=================================================================================================
4692 
4693 //*************************************************************************************************
4695 template< typename MT, typename VT, bool AF >
4696 struct SubvectorExprTrait< TDMatDVecMultExpr<MT,VT>, AF >
4697 {
4698  public:
4699  //**********************************************************************************************
4700  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT,AF>::Type
4701  , typename SubvectorExprTrait<const VT,AF>::Type >::Type Type;
4702  //**********************************************************************************************
4703 };
4705 //*************************************************************************************************
4706 
4707 } // namespace blaze
4708 
4709 #endif
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exceptionThis macro encapsulates the default way of...
Definition: Exception.h:187
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1729
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, simd_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Header file for mathematical functions.
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7820
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:252
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a column dense or sparse vector type...
Definition: ColumnVector.h:79
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:207
Header file for the IsDiagonal type trait.
TDMatDVecMultExpr< MT, VT > This
Type of this TDMatDVecMultExpr instance.
Definition: TDMatDVecMultExpr.h:200
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:507
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2588
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:259
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
Header file for the Computation base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Header file for the RequiresEvaluation type trait.
MRT::ElementType MET
Element type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:127
System settings for performance optimizations.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDVecMultExpr.h:204
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Expression object for transpose dense matrix-dense vector multiplications.The TDMatDVecMultExpr class...
Definition: Forward.h:139
Constraint on the data type.
Header file for the IsComplexDouble type trait.
MT::ResultType MRT
Result type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:125
SelectType< evaluateVector, const VRT, VCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:218
Constraint on the transpose flag of vector types.
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDVecMultExpr.h:319
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDVecMultExpr.h:353
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:261
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:79
Header file for the IsMatMatMultExpr type trait class.
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exceptionThis macro encapsulates the default way of Bla...
Definition: Exception.h:331
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1682
Header file for the IsBlasCompatible type trait.
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:209
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATVECMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/vector ...
Definition: MatVecMultExpr.h:166
Constraint on the data type.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2586
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
VT::CompositeType VCT
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:130
Header file for the serial shim.
TDMatDVecMultExpr(const MT &mat, const VT &vec)
Constructor for the TDMatDVecMultExpr class.
Definition: TDMatDVecMultExpr.h:240
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDMatDVecMultExpr.h:254
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense vector operand.
Definition: TDMatDVecMultExpr.h:329
Header file for the HasConstDataAccess type trait.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDVecMultExpr.h:203
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the SubmatrixExprTrait class template.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:1232
VRT::ElementType VET
Element type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:128
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:138
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDVecMultExpr.h:341
VT::ResultType VRT
Result type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:126
SelectType< evaluateMatrix, const MRT, MCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDVecMultExpr.h:215
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type RightOperand
Composite type of the right-hand side dense vector expression.
Definition: TDMatDVecMultExpr.h:212
Header file for BLAS triangular matrix/vector multiplication functions (trmv)
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:202
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
const bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDMatDVecMultExpr.h:309
Header file for the reset shim.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
MultTrait< MRT, VRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:201
Constraints on the storage order of matrix types.
Header file for the HasMutableDataAccess type trait.
MT::CompositeType MCT
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDVecMultExpr.h:129
Header file for all intrinsic functionality.
Header file for BLAS general matrix/vector multiplication functions (gemv)
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDVecMultExpr.h:373
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:258
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
ReturnType at(size_t index) const
Checked access to the vector elements.
Definition: TDMatDVecMultExpr.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the IsComplexFloat type trait.
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2583
BLAZE_ALWAYS_INLINE size_t columns(const Matrix< MT, SO > &matrix)
Returns the current number of columns of the matrix.
Definition: Matrix.h:324
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Constraint on the data type.
Header file for the complex data type.
Header file for the IsUpper type trait.
Header file for exception macros.
LeftOperand mat_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDVecMultExpr.h:383
Header file for the MatVecMultExpr base class.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
RightOperand vec_
Right-hand side dense vector of the multiplication expression.
Definition: TDMatDVecMultExpr.h:384
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDVecMultExpr.h:206
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDVecMultExpr.h:205
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDVecMultExpr.h:363
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.