TDVecDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDVECDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/blas/gemv.h>
44 #include <blaze/math/blas/trmv.h>
56 #include <blaze/math/Functions.h>
57 #include <blaze/math/Intrinsics.h>
58 #include <blaze/math/shims/Reset.h>
80 #include <blaze/system/BLAS.h>
83 #include <blaze/util/Assert.h>
84 #include <blaze/util/Complex.h>
87 #include <blaze/util/DisableIf.h>
88 #include <blaze/util/EnableIf.h>
89 #include <blaze/util/Exception.h>
91 #include <blaze/util/SelectType.h>
92 #include <blaze/util/Types.h>
101 
102 
103 namespace blaze {
104 
105 //=================================================================================================
106 //
107 // CLASS TDVECDMATMULTEXPR
108 //
109 //=================================================================================================
110 
111 //*************************************************************************************************
118 template< typename VT // Type of the left-hand side dense vector
119  , typename MT > // Type of the right-hand side dense matrix
120 class TDVecDMatMultExpr : public DenseVector< TDVecDMatMultExpr<VT,MT>, true >
121  , private TVecMatMultExpr
122  , private Computation
123 {
124  private:
125  //**Type definitions****************************************************************************
126  typedef typename VT::ResultType VRT;
127  typedef typename MT::ResultType MRT;
128  typedef typename VRT::ElementType VET;
129  typedef typename MRT::ElementType MET;
130  typedef typename VT::CompositeType VCT;
131  typedef typename MT::CompositeType MCT;
132  //**********************************************************************************************
133 
134  //**********************************************************************************************
136  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
137  //**********************************************************************************************
138 
139  //**********************************************************************************************
141  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
143  //**********************************************************************************************
144 
145  //**********************************************************************************************
147 
151  template< typename T1 >
152  struct UseSMPAssign {
153  enum { value = ( evaluateVector || evaluateMatrix ) };
154  };
156  //**********************************************************************************************
157 
158  //**********************************************************************************************
160 
163  template< typename T1, typename T2, typename T3 >
164  struct UseBlasKernel {
165  enum { value = BLAZE_BLAS_MODE &&
166  HasMutableDataAccess<T1>::value &&
167  HasConstDataAccess<T2>::value &&
168  HasConstDataAccess<T3>::value &&
169  !IsDiagonal<T3>::value &&
170  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
171  IsBlasCompatible<typename T1::ElementType>::value &&
172  IsBlasCompatible<typename T2::ElementType>::value &&
173  IsBlasCompatible<typename T3::ElementType>::value &&
174  IsSame< typename T1::ElementType, typename T2::ElementType >::value &&
175  IsSame< typename T1::ElementType, typename T3::ElementType >::value };
176  };
178  //**********************************************************************************************
179 
180  //**********************************************************************************************
182 
186  template< typename T1, typename T2, typename T3 >
187  struct UseVectorizedDefaultKernel {
188  enum { value = useOptimizedKernels &&
189  !IsDiagonal<T3>::value &&
190  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
191  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
192  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
193  IntrinsicTrait<typename T1::ElementType>::addition &&
194  IntrinsicTrait<typename T1::ElementType>::multiplication };
195  };
197  //**********************************************************************************************
198 
199  public:
200  //**Type definitions****************************************************************************
206  typedef const ElementType ReturnType;
207  typedef const ResultType CompositeType;
208 
210  typedef typename SelectType< IsExpression<VT>::value, const VT, const VT& >::Type LeftOperand;
211 
213  typedef typename SelectType< IsExpression<MT>::value, const MT, const MT& >::Type RightOperand;
214 
217 
220  //**********************************************************************************************
221 
222  //**Compilation flags***************************************************************************
224  enum { vectorizable = !IsDiagonal<MT>::value &&
225  VT::vectorizable && MT::vectorizable &&
229 
231  enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
232  !evaluateMatrix && MT::smpAssignable };
233  //**********************************************************************************************
234 
235  //**Constructor*********************************************************************************
241  explicit inline TDVecDMatMultExpr( const VT& vec, const MT& mat )
242  : vec_( vec ) // Left-hand side dense vector of the multiplication expression
243  , mat_( mat ) // Right-hand side dense matrix of the multiplication expression
244  {
245  BLAZE_INTERNAL_ASSERT( vec_.size() == mat_.rows(), "Invalid vector and matrix sizes" );
246  }
247  //**********************************************************************************************
248 
249  //**Subscript operator**************************************************************************
255  inline ReturnType operator[]( size_t index ) const {
256  BLAZE_INTERNAL_ASSERT( index < mat_.columns(), "Invalid vector access index" );
257 
258  if( ( IsStrictlyLower<MT>::value && index == mat_.columns()-1UL ) ||
259  ( IsStrictlyUpper<MT>::value && index == 0UL ) ||
260  mat_.rows() == 0UL )
261  return ElementType();
262 
264  return vec_[index] * mat_(index,index);
265 
266  const size_t ibegin( ( IsLower<MT>::value )
267  ?( IsStrictlyLower<MT>::value ? index+1UL : index )
268  :( 0UL ) );
269  const size_t iend( ( IsUpper<MT>::value )
270  ?( IsStrictlyUpper<MT>::value ? index : index+1UL )
271  :( mat_.rows() ) );
272  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
273 
274  const size_t inum( iend - ibegin );
275  const size_t ipos( ibegin + ( ( inum - 1UL ) & size_t(-2) ) + 1UL );
276 
277  ElementType res( vec_[ibegin] * mat_(ibegin,index) );
278 
279  for( size_t i=ibegin+1UL; i<ipos; i+=2UL ) {
280  res += vec_[i] * mat_(i,index) + vec_[i+1UL] * mat_(i+1UL,index);
281  }
282  if( ipos < iend ) {
283  res += vec_[ipos] * mat_(ipos,index);
284  }
285 
286  return res;
287  }
288  //**********************************************************************************************
289 
290  //**At function*********************************************************************************
297  inline ReturnType at( size_t index ) const {
298  if( index >= mat_.columns() ) {
299  BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
300  }
301  return (*this)[index];
302  }
303  //**********************************************************************************************
304 
305  //**Size function*******************************************************************************
310  inline size_t size() const {
311  return mat_.columns();
312  }
313  //**********************************************************************************************
314 
315  //**Left operand access*************************************************************************
320  inline LeftOperand leftOperand() const {
321  return vec_;
322  }
323  //**********************************************************************************************
324 
325  //**Right operand access************************************************************************
330  inline RightOperand rightOperand() const {
331  return mat_;
332  }
333  //**********************************************************************************************
334 
335  //**********************************************************************************************
341  template< typename T >
342  inline bool canAlias( const T* alias ) const {
343  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
344  }
345  //**********************************************************************************************
346 
347  //**********************************************************************************************
353  template< typename T >
354  inline bool isAliased( const T* alias ) const {
355  return ( vec_.isAliased( alias ) || mat_.isAliased( alias ) );
356  }
357  //**********************************************************************************************
358 
359  //**********************************************************************************************
364  inline bool isAligned() const {
365  return vec_.isAligned() && mat_.isAligned();
366  }
367  //**********************************************************************************************
368 
369  //**********************************************************************************************
374  inline bool canSMPAssign() const {
375  return ( !BLAZE_BLAS_IS_PARALLEL ||
376  ( IsComputation<MT>::value && !evaluateMatrix ) ||
377  ( mat_.rows() * mat_.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
378  ( size() > SMP_TDVECDMATMULT_THRESHOLD );
379  }
380  //**********************************************************************************************
381 
382  private:
383  //**Member variables****************************************************************************
384  LeftOperand vec_;
385  RightOperand mat_;
386  //**********************************************************************************************
387 
388  //**Assignment to dense vectors*****************************************************************
401  template< typename VT1 > // Type of the target dense vector
402  friend inline void assign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
403  {
405 
406  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
407 
408  if( rhs.mat_.rows() == 0UL ) {
409  reset( ~lhs );
410  return;
411  }
412  else if( rhs.mat_.columns() == 0UL ) {
413  return;
414  }
415 
416  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
417  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
418 
419  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
420  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
421  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
422  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
423 
424  TDVecDMatMultExpr::selectAssignKernel( ~lhs, x, A );
425  }
427  //**********************************************************************************************
428 
429  //**Assignment to dense vectors (kernel selection)**********************************************
440  template< typename VT1 // Type of the left-hand side target vector
441  , typename VT2 // Type of the left-hand side vector operand
442  , typename MT1 > // Type of the right-hand side matrix operand
443  static inline void selectAssignKernel( VT1& y, const VT2& x, const MT1& A )
444  {
445  if( ( IsDiagonal<MT1>::value ) ||
446  ( IsComputation<MT>::value && !evaluateMatrix ) ||
447  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
448  selectSmallAssignKernel( y, x, A );
449  else
450  selectBlasAssignKernel( y, x, A );
451  }
453  //**********************************************************************************************
454 
455  //**Default assignment to dense vectors*********************************************************
469  template< typename VT1 // Type of the left-hand side target vector
470  , typename VT2 // Type of the left-hand side vector operand
471  , typename MT1 > // Type of the right-hand side matrix operand
472  static inline void selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A )
473  {
474  const size_t M( A.rows() );
475  const size_t N( A.columns() );
476 
477  if( IsStrictlyUpper<MT1>::value ) {
478  reset( y[0] );
479  }
480 
481  if( !IsLower<MT1>::value )
482  {
483  const size_t jbegin( IsStrictlyUpper<MT1>::value ? 1UL : 0UL );
484  for( size_t j=jbegin; j<N; ++j ) {
485  y[j] = x[0UL] * A(0UL,j);
486  }
487  }
488 
489  for( size_t i=( IsLower<MT1>::value && !IsStrictlyLower<MT1>::value ? 0UL : 1UL ); i<M; ++i )
490  {
491  if( IsDiagonal<MT1>::value )
492  {
493  y[i] = x[i] * A(i,i);
494  }
495  else
496  {
497  const size_t jbegin( ( IsUpper<MT1>::value )
498  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
499  :( 0UL ) );
500  const size_t jend( ( IsLower<MT1>::value )
501  ?( IsStrictlyLower<MT1>::value ? i-1UL : i )
502  :( N ) );
503  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
504 
505  const size_t jnum( jend - jbegin );
506  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
507 
508  for( size_t j=jbegin; j<jpos; j+=2UL ) {
509  y[j ] += x[i] * A(i,j );
510  y[j+1UL] += x[i] * A(i,j+1UL);
511  }
512  if( jpos < jend ) {
513  y[jpos] += x[i] * A(i,jpos);
514  }
515  if( IsLower<MT1>::value ) {
516  y[jend] = x[i] * A(i,jend);
517  }
518  }
519  }
520 
521  if( IsStrictlyLower<MT1>::value ) {
522  reset( y[N-1UL] );
523  }
524  }
526  //**********************************************************************************************
527 
528  //**Default assignment to dense vectors (small matrices)****************************************
542  template< typename VT1 // Type of the left-hand side target vector
543  , typename VT2 // Type of the left-hand side vector operand
544  , typename MT1 > // Type of the right-hand side matrix operand
545  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
546  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A )
547  {
548  selectDefaultAssignKernel( y, x, A );
549  }
551  //**********************************************************************************************
552 
553  //**Vectorized default assignment to dense vectors (small matrices)*****************************
567  template< typename VT1 // Type of the left-hand side target vector
568  , typename VT2 // Type of the left-hand side vector operand
569  , typename MT1 > // Type of the right-hand side matrix operand
570  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
571  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A )
572  {
573  typedef IntrinsicTrait<ElementType> IT;
574 
575  const size_t M( A.rows() );
576  const size_t N( A.columns() );
577 
578  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
579 
580  const size_t jpos( remainder ? ( N & size_t(-IT::size) ) : N );
581  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % IT::size ) ) == jpos, "Invalid end calculation" );
582 
583  size_t j( 0UL );
584 
585  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
586  {
587  const size_t ibegin( ( IsLower<MT1>::value )
588  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
589  :( 0UL ) );
590  const size_t iend( ( IsUpper<MT1>::value )
591  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
592  :( M ) );
593  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
594 
595  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
596 
597  for( size_t i=ibegin; i<iend; ++i ) {
598  const IntrinsicType x1( set( x[i] ) );
599  xmm1 = xmm1 + x1 * A.load(i,j );
600  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
601  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
602  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
603  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
604  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
605  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
606  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
607  }
608 
609  y.store( j , xmm1 );
610  y.store( j+IT::size , xmm2 );
611  y.store( j+IT::size*2UL, xmm3 );
612  y.store( j+IT::size*3UL, xmm4 );
613  y.store( j+IT::size*4UL, xmm5 );
614  y.store( j+IT::size*5UL, xmm6 );
615  y.store( j+IT::size*6UL, xmm7 );
616  y.store( j+IT::size*7UL, xmm8 );
617  }
618 
619  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
620  {
621  const size_t ibegin( ( IsLower<MT1>::value )
622  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
623  :( 0UL ) );
624  const size_t iend( ( IsUpper<MT1>::value )
625  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
626  :( M ) );
627  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
628 
629  IntrinsicType xmm1, xmm2, xmm3, xmm4;
630 
631  for( size_t i=ibegin; i<iend; ++i ) {
632  const IntrinsicType x1( set( x[i] ) );
633  xmm1 = xmm1 + x1 * A.load(i,j );
634  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
635  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
636  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
637  }
638 
639  y.store( j , xmm1 );
640  y.store( j+IT::size , xmm2 );
641  y.store( j+IT::size*2UL, xmm3 );
642  y.store( j+IT::size*3UL, xmm4 );
643  }
644 
645  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
646  {
647  const size_t ibegin( ( IsLower<MT1>::value )
648  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
649  :( 0UL ) );
650  const size_t iend( ( IsUpper<MT1>::value )
651  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
652  :( M ) );
653  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
654 
655  IntrinsicType xmm1, xmm2, xmm3;
656 
657  for( size_t i=ibegin; i<iend; ++i ) {
658  const IntrinsicType x1( set( x[i] ) );
659  xmm1 = xmm1 + x1 * A.load(i,j );
660  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
661  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
662  }
663 
664  y.store( j , xmm1 );
665  y.store( j+IT::size , xmm2 );
666  y.store( j+IT::size*2UL, xmm3 );
667  }
668 
669  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
670  {
671  const size_t ibegin( ( IsLower<MT1>::value )
672  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
673  :( 0UL ) );
674  const size_t iend( ( IsUpper<MT1>::value )
675  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
676  :( M ) );
677  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
678 
679  IntrinsicType xmm1, xmm2;
680 
681  for( size_t i=ibegin; i<iend; ++i ) {
682  const IntrinsicType x1( set( x[i] ) );
683  xmm1 = xmm1 + x1 * A.load(i,j );
684  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
685  }
686 
687  y.store( j , xmm1 );
688  y.store( j+IT::size, xmm2 );
689  }
690 
691  for( ; j<jpos; j+=IT::size )
692  {
693  const size_t ibegin( ( IsLower<MT1>::value )
694  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
695  :( 0UL ) );
696  const size_t iend( ( IsUpper<MT1>::value )
697  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
698  :( M ) );
699  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
700 
701  IntrinsicType xmm1;
702 
703  for( size_t i=ibegin; i<iend; ++i ) {
704  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
705  }
706 
707  y.store( j, xmm1 );
708  }
709 
710  for( ; remainder && j<N; ++j )
711  {
712  const size_t ibegin( ( IsLower<MT1>::value )
713  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
714  :( 0UL ) );
715  const size_t iend( ( IsUpper<MT1>::value )
716  ?( min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
717  :( M ) );
718  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
719 
720  ElementType value = ElementType();
721 
722  for( size_t i=ibegin; i<iend; ++i ) {
723  value += x[i] * A(i,j);
724  }
725 
726  y[j] = value;
727  }
728  }
730  //**********************************************************************************************
731 
732  //**Default assignment to dense vectors (large matrices)****************************************
746  template< typename VT1 // Type of the left-hand side target vector
747  , typename VT2 // Type of the left-hand side vector operand
748  , typename MT1 > // Type of the right-hand side matrix operand
749  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
750  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A )
751  {
752  selectDefaultAssignKernel( y, x, A );
753  }
755  //**********************************************************************************************
756 
757  //**Vectorized default assignment to dense vectors (large matrices)*****************************
771  template< typename VT1 // Type of the left-hand side target vector
772  , typename VT2 // Type of the left-hand side vector operand
773  , typename MT1 > // Type of the right-hand side matrix operand
774  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
775  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A )
776  {
777  typedef IntrinsicTrait<ElementType> IT;
778 
779  const size_t M( A.rows() );
780  const size_t N( A.columns() );
781 
782  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
783 
784  const size_t jblock( 32768UL / sizeof( ElementType ) );
785  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
786 
787  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
788 
789  reset( y );
790 
791  for( size_t jj=0U; jj<N; jj+=jblock ) {
792  for( size_t ii=0UL; ii<M; ii+=iblock )
793  {
794  const size_t iend( min( ii+iblock, M ) );
795  const size_t jtmp( min( jj+jblock, N ) );
796  const size_t jend( ( IsLower<MT1>::value )
797  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
798  :( jtmp ) );
799 
800  const size_t jpos( remainder ? ( jend & size_t(-IT::size) ) : jend );
801  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % IT::size ) ) == jpos, "Invalid end calculation" );
802 
803  size_t j( ( IsUpper<MT1>::value )
804  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
805  :( jj ) );
806 
807  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
808  {
809  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
810 
811  for( size_t i=ii; i<iend; ++i ) {
812  const IntrinsicType x1( set( x[i] ) );
813  xmm1 = xmm1 + x1 * A.load(i,j );
814  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
815  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
816  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
817  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
818  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
819  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
820  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
821  }
822 
823  y.store( j , y.load(j ) + xmm1 );
824  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
825  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
826  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4 );
827  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5 );
828  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6 );
829  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7 );
830  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8 );
831  }
832 
833  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
834  {
835  IntrinsicType xmm1, xmm2, xmm3, xmm4;
836 
837  for( size_t i=ii; i<iend; ++i ) {
838  const IntrinsicType x1( set( x[i] ) );
839  xmm1 = xmm1 + x1 * A.load(i,j );
840  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
841  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
842  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
843  }
844 
845  y.store( j , y.load(j ) + xmm1 );
846  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
847  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
848  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4 );
849  }
850 
851  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
852  {
853  IntrinsicType xmm1, xmm2, xmm3;
854 
855  for( size_t i=ii; i<iend; ++i ) {
856  const IntrinsicType x1( set( x[i] ) );
857  xmm1 = xmm1 + x1 * A.load(i,j );
858  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
859  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
860  }
861 
862  y.store( j , y.load(j ) + xmm1 );
863  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
864  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
865  }
866 
867  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
868  {
869  IntrinsicType xmm1, xmm2;
870 
871  for( size_t i=ii; i<iend; ++i ) {
872  const IntrinsicType x1( set( x[i] ) );
873  xmm1 = xmm1 + x1 * A.load(i,j );
874  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
875  }
876 
877  y.store( j , y.load(j ) + xmm1 );
878  y.store( j+IT::size, y.load(j+IT::size) + xmm2 );
879  }
880 
881  for( ; j<jpos; j+=IT::size )
882  {
883  IntrinsicType xmm1;
884 
885  for( size_t i=ii; i<iend; ++i ) {
886  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
887  }
888 
889  y.store( j, y.load(j) + xmm1 );
890  }
891 
892  for( ; remainder && j<jend; ++j )
893  {
894  ElementType value = ElementType();
895 
896  for( size_t i=ii; i<iend; ++i ) {
897  value += x[i] * A(i,j);
898  }
899 
900  y[j] += value;
901  }
902  }
903  }
904  }
906  //**********************************************************************************************
907 
908  //**BLAS-based assignment to dense vectors (default)********************************************
922  template< typename VT1 // Type of the left-hand side target vector
923  , typename VT2 // Type of the left-hand side vector operand
924  , typename MT1 > // Type of the right-hand side matrix operand
925  static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
926  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
927  {
928  selectLargeAssignKernel( y, x, A );
929  }
931  //**********************************************************************************************
932 
933  //**BLAS-based assignment to dense vectors******************************************************
934 #if BLAZE_BLAS_MODE
935 
948  template< typename VT1 // Type of the left-hand side target vector
949  , typename VT2 // Type of the left-hand side vector operand
950  , typename MT1 > // Type of the right-hand side matrix operand
951  static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
952  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A )
953  {
954  typedef typename VT1::ElementType ET;
955 
956  if( IsTriangular<MT1>::value ) {
957  assign( y, x );
958  trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
959  }
960  else {
961  gemv( y, x, A, ET(1), ET(0) );
962  }
963  }
965 #endif
966  //**********************************************************************************************
967 
968  //**Assignment to sparse vectors****************************************************************
981  template< typename VT1 > // Type of the target sparse vector
982  friend inline void assign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
983  {
985 
989 
990  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
991 
992  const ResultType tmp( serial( rhs ) );
993  assign( ~lhs, tmp );
994  }
996  //**********************************************************************************************
997 
998  //**Addition assignment to dense vectors********************************************************
1011  template< typename VT1 > // Type of the target dense vector
1012  friend inline void addAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1013  {
1015 
1016  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1017 
1018  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1019  return;
1020  }
1021 
1022  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
1023  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
1024 
1025  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1026  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1027  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1028  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1029 
1030  TDVecDMatMultExpr::selectAddAssignKernel( ~lhs, x, A );
1031  }
1033  //**********************************************************************************************
1034 
1035  //**Addition assignment to dense vectors (kernel selection)*************************************
1046  template< typename VT1 // Type of the left-hand side target vector
1047  , typename VT2 // Type of the left-hand side vector operand
1048  , typename MT1 > // Type of the right-hand side matrix operand
1049  static inline void selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1050  {
1051  if( ( IsDiagonal<MT1>::value ) ||
1052  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1053  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1054  selectSmallAddAssignKernel( y, x, A );
1055  else
1056  selectBlasAddAssignKernel( y, x, A );
1057  }
1059  //**********************************************************************************************
1060 
1061  //**Default addition assignment to dense vectors************************************************
1075  template< typename VT1 // Type of the left-hand side target vector
1076  , typename VT2 // Type of the left-hand side vector operand
1077  , typename MT1 > // Type of the right-hand side matrix operand
1078  static inline void selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1079  {
1080  const size_t M( A.rows() );
1081  const size_t N( A.columns() );
1082 
1083  for( size_t i=0UL; i<M; ++i )
1084  {
1085  if( IsDiagonal<MT1>::value )
1086  {
1087  y[i] += x[i] * A(i,i);
1088  }
1089  else
1090  {
1091  const size_t jbegin( ( IsUpper<MT1>::value )
1092  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1093  :( 0UL ) );
1094  const size_t jend( ( IsLower<MT1>::value )
1095  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1096  :( N ) );
1097  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1098 
1099  const size_t jnum( jend - jbegin );
1100  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1101 
1102  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1103  y[j ] += x[i] * A(i,j );
1104  y[j+1UL] += x[i] * A(i,j+1UL);
1105  }
1106  if( jpos < jend ) {
1107  y[jpos] += x[i] * A(i,jpos);
1108  }
1109  }
1110  }
1111  }
1113  //**********************************************************************************************
1114 
1115  //**Default addition assignment to dense vectors (small matrices)*******************************
1129  template< typename VT1 // Type of the left-hand side target vector
1130  , typename VT2 // Type of the left-hand side vector operand
1131  , typename MT1 > // Type of the right-hand side matrix operand
1132  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1133  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1134  {
1135  selectDefaultAddAssignKernel( y, x, A );
1136  }
1138  //**********************************************************************************************
1139 
1140  //**Vectorized default addition assignment to dense vectors (small matrices)********************
1154  template< typename VT1 // Type of the left-hand side target vector
1155  , typename VT2 // Type of the left-hand side vector operand
1156  , typename MT1 > // Type of the right-hand side matrix operand
1157  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1158  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1159  {
1160  typedef IntrinsicTrait<ElementType> IT;
1161 
1162  const size_t M( A.rows() );
1163  const size_t N( A.columns() );
1164 
1165  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
1166 
1167  const size_t jpos( remainder ? ( N & size_t(-IT::size) ) : N );
1168  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % IT::size ) ) == jpos, "Invalid end calculation" );
1169 
1170  size_t j( 0UL );
1171 
1172  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
1173  {
1174  const size_t ibegin( ( IsLower<MT1>::value )
1175  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1176  :( 0UL ) );
1177  const size_t iend( ( IsUpper<MT1>::value )
1178  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1179  :( M ) );
1180  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1181 
1182  IntrinsicType xmm1( y.load(j ) );
1183  IntrinsicType xmm2( y.load(j+IT::size ) );
1184  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1185  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1186  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
1187  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
1188  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
1189  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
1190 
1191  for( size_t i=ibegin; i<iend; ++i ) {
1192  const IntrinsicType x1( set( x[i] ) );
1193  xmm1 = xmm1 + x1 * A.load(i,j );
1194  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1195  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1196  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1197  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
1198  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
1199  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
1200  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
1201  }
1202 
1203  y.store( j , xmm1 );
1204  y.store( j+IT::size , xmm2 );
1205  y.store( j+IT::size*2UL, xmm3 );
1206  y.store( j+IT::size*3UL, xmm4 );
1207  y.store( j+IT::size*4UL, xmm5 );
1208  y.store( j+IT::size*5UL, xmm6 );
1209  y.store( j+IT::size*6UL, xmm7 );
1210  y.store( j+IT::size*7UL, xmm8 );
1211  }
1212 
1213  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
1214  {
1215  const size_t ibegin( ( IsLower<MT1>::value )
1216  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1217  :( 0UL ) );
1218  const size_t iend( ( IsUpper<MT1>::value )
1219  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1220  :( M ) );
1221  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1222 
1223  IntrinsicType xmm1( y.load(j ) );
1224  IntrinsicType xmm2( y.load(j+IT::size ) );
1225  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1226  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1227 
1228  for( size_t i=ibegin; i<iend; ++i ) {
1229  const IntrinsicType x1( set( x[i] ) );
1230  xmm1 = xmm1 + x1 * A.load(i,j );
1231  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1232  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1233  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1234  }
1235 
1236  y.store( j , xmm1 );
1237  y.store( j+IT::size , xmm2 );
1238  y.store( j+IT::size*2UL, xmm3 );
1239  y.store( j+IT::size*3UL, xmm4 );
1240  }
1241 
1242  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
1243  {
1244  const size_t ibegin( ( IsLower<MT1>::value )
1245  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1246  :( 0UL ) );
1247  const size_t iend( ( IsUpper<MT1>::value )
1248  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1249  :( M ) );
1250  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1251 
1252  IntrinsicType xmm1( y.load(j ) );
1253  IntrinsicType xmm2( y.load(j+IT::size ) );
1254  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1255 
1256  for( size_t i=ibegin; i<iend; ++i ) {
1257  const IntrinsicType x1( set( x[i] ) );
1258  xmm1 = xmm1 + x1 * A.load(i,j );
1259  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1260  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1261  }
1262 
1263  y.store( j , xmm1 );
1264  y.store( j+IT::size , xmm2 );
1265  y.store( j+IT::size*2UL, xmm3 );
1266  }
1267 
1268  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
1269  {
1270  const size_t ibegin( ( IsLower<MT1>::value )
1271  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1272  :( 0UL ) );
1273  const size_t iend( ( IsUpper<MT1>::value )
1274  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1275  :( M ) );
1276  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1277 
1278  IntrinsicType xmm1( y.load(j ) );
1279  IntrinsicType xmm2( y.load(j+IT::size) );
1280 
1281  for( size_t i=ibegin; i<iend; ++i ) {
1282  const IntrinsicType x1( set( x[i] ) );
1283  xmm1 = xmm1 + x1 * A.load(i,j );
1284  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
1285  }
1286 
1287  y.store( j , xmm1 );
1288  y.store( j+IT::size, xmm2 );
1289  }
1290 
1291  for( ; j<jpos; j+=IT::size )
1292  {
1293  const size_t ibegin( ( IsLower<MT1>::value )
1294  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1295  :( 0UL ) );
1296  const size_t iend( ( IsUpper<MT1>::value )
1297  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1298  :( M ) );
1299  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1300 
1301  IntrinsicType xmm1( y.load(j) );
1302 
1303  for( size_t i=ibegin; i<iend; ++i ) {
1304  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
1305  }
1306 
1307  y.store( j, xmm1 );
1308  }
1309 
1310  for( ; remainder && j<N; ++j )
1311  {
1312  const size_t ibegin( ( IsLower<MT1>::value )
1313  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1314  :( 0UL ) );
1315  const size_t iend( ( IsUpper<MT1>::value )
1316  ?( min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1317  :( M ) );
1318  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1319 
1320  ElementType value = ElementType();
1321 
1322  for( size_t i=ibegin; i<iend; ++i ) {
1323  value += x[i] * A(i,j);
1324  }
1325 
1326  y[j] += value;
1327  }
1328  }
1330  //**********************************************************************************************
1331 
1332  //**Default addition assignment to dense vectors (large matrices)*******************************
1346  template< typename VT1 // Type of the left-hand side target vector
1347  , typename VT2 // Type of the left-hand side vector operand
1348  , typename MT1 > // Type of the right-hand side matrix operand
1349  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1350  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1351  {
1352  selectDefaultAddAssignKernel( y, x, A );
1353  }
1355  //**********************************************************************************************
1356 
1357  //**Vectorized default addition assignment to dense vectors (large matrices)********************
1371  template< typename VT1 // Type of the left-hand side target vector
1372  , typename VT2 // Type of the left-hand side vector operand
1373  , typename MT1 > // Type of the right-hand side matrix operand
1374  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1375  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1376  {
1377  typedef IntrinsicTrait<ElementType> IT;
1378 
1379  const size_t M( A.rows() );
1380  const size_t N( A.columns() );
1381 
1382  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
1383 
1384  const size_t jblock( 32768UL / sizeof( ElementType ) );
1385  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
1386 
1387  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
1388 
1389  for( size_t jj=0U; jj<N; jj+=jblock ) {
1390  for( size_t ii=0UL; ii<M; ii+=iblock )
1391  {
1392  const size_t iend( min( ii+iblock, M ) );
1393  const size_t jtmp( min( jj+jblock, N ) );
1394  const size_t jend( ( IsLower<MT1>::value )
1395  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
1396  :( jtmp ) );
1397 
1398  const size_t jpos( remainder ? ( jend & size_t(-IT::size) ) : jend );
1399  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % IT::size ) ) == jpos, "Invalid end calculation" );
1400 
1401  size_t j( ( IsUpper<MT1>::value )
1402  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
1403  :( jj ) );
1404 
1405  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
1406  {
1407  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1408 
1409  for( size_t i=ii; i<iend; ++i ) {
1410  const IntrinsicType x1( set( x[i] ) );
1411  xmm1 = xmm1 + x1 * A.load(i,j );
1412  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1413  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1414  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1415  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
1416  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
1417  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
1418  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
1419  }
1420 
1421  y.store( j , y.load(j ) + xmm1 );
1422  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
1423  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
1424  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4 );
1425  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5 );
1426  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6 );
1427  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7 );
1428  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8 );
1429  }
1430 
1431  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
1432  {
1433  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1434 
1435  for( size_t i=ii; i<iend; ++i ) {
1436  const IntrinsicType x1( set( x[i] ) );
1437  xmm1 = xmm1 + x1 * A.load(i,j );
1438  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1439  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1440  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1441  }
1442 
1443  y.store( j , y.load(j ) + xmm1 );
1444  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
1445  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
1446  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4 );
1447  }
1448 
1449  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
1450  {
1451  IntrinsicType xmm1, xmm2, xmm3;
1452 
1453  for( size_t i=ii; i<iend; ++i ) {
1454  const IntrinsicType x1( set( x[i] ) );
1455  xmm1 = xmm1 + x1 * A.load(i,j );
1456  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1457  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1458  }
1459 
1460  y.store( j , y.load(j ) + xmm1 );
1461  y.store( j+IT::size , y.load(j+IT::size ) + xmm2 );
1462  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3 );
1463  }
1464 
1465  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
1466  {
1467  IntrinsicType xmm1, xmm2;
1468 
1469  for( size_t i=ii; i<iend; ++i ) {
1470  const IntrinsicType x1( set( x[i] ) );
1471  xmm1 = xmm1 + x1 * A.load(i,j );
1472  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
1473  }
1474 
1475  y.store( j , y.load(j ) + xmm1 );
1476  y.store( j+IT::size, y.load(j+IT::size) + xmm2 );
1477  }
1478 
1479  for( ; j<jpos; j+=IT::size )
1480  {
1481  IntrinsicType xmm1;
1482 
1483  for( size_t i=ii; i<iend; ++i ) {
1484  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
1485  }
1486 
1487  y.store( j, y.load(j) + xmm1 );
1488  }
1489 
1490  for( ; remainder && j<jend; ++j )
1491  {
1492  ElementType value = ElementType();
1493 
1494  for( size_t i=ii; i<iend; ++i ) {
1495  value += x[i] * A(i,j);
1496  }
1497 
1498  y[j] += value;
1499  }
1500  }
1501  }
1502  }
1504  //**********************************************************************************************
1505 
1506  //**BLAS-based addition assignment to dense vectors (default)***********************************
1520  template< typename VT1 // Type of the left-hand side target vector
1521  , typename VT2 // Type of the left-hand side vector operand
1522  , typename MT1 > // Type of the right-hand side matrix operand
1523  static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
1524  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1525  {
1526  selectLargeAddAssignKernel( y, x, A );
1527  }
1529  //**********************************************************************************************
1530 
1531  //**BLAS-based addition assignment to dense vectors*********************************************
1532 #if BLAZE_BLAS_MODE
1533 
1546  template< typename VT1 // Type of the left-hand side target vector
1547  , typename VT2 // Type of the left-hand side vector operand
1548  , typename MT1 > // Type of the right-hand side matrix operand
1549  static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
1550  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A )
1551  {
1552  typedef typename VT1::ElementType ET;
1553 
1554  if( IsTriangular<MT1>::value ) {
1555  typename VT1::ResultType tmp( serial( x ) );
1556  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
1557  addAssign( y, tmp );
1558  }
1559  else {
1560  gemv( y, x, A, ET(1), ET(1) );
1561  }
1562  }
1564 #endif
1565  //**********************************************************************************************
1566 
1567  //**Addition assignment to sparse vectors*******************************************************
1568  // No special implementation for the addition assignment to sparse vectors.
1569  //**********************************************************************************************
1570 
1571  //**Subtraction assignment to dense vectors*****************************************************
1584  template< typename VT1 > // Type of the target dense vector
1585  friend inline void subAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
1586  {
1588 
1589  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
1590 
1591  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
1592  return;
1593  }
1594 
1595  LT x( serial( rhs.vec_ ) ); // Evaluation of the left-hand side dense vector operand
1596  RT A( serial( rhs.mat_ ) ); // Evaluation of the right-hand side dense matrix operand
1597 
1598  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
1599  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
1600  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
1601  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
1602 
1603  TDVecDMatMultExpr::selectSubAssignKernel( ~lhs, x, A );
1604  }
1606  //**********************************************************************************************
1607 
1608  //**Subtraction assignment to dense vectors (kernel selection)**********************************
1619  template< typename VT1 // Type of the left-hand side target vector
1620  , typename VT2 // Type of the left-hand side vector operand
1621  , typename MT1 > // Type of the right-hand side matrix operand
1622  static inline void selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1623  {
1624  if( ( IsDiagonal<MT1>::value ) ||
1625  ( IsComputation<MT>::value && !evaluateMatrix ) ||
1626  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
1627  selectSmallSubAssignKernel( y, x, A );
1628  else
1629  selectBlasSubAssignKernel( y, x, A );
1630  }
1632  //**********************************************************************************************
1633 
1634  //**Default subtraction assignment to dense vectors*********************************************
1648  template< typename VT1 // Type of the left-hand side target vector
1649  , typename VT2 // Type of the left-hand side vector operand
1650  , typename MT1 > // Type of the right-hand side matrix operand
1651  static inline void selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1652  {
1653  const size_t M( A.rows() );
1654  const size_t N( A.columns() );
1655 
1656  for( size_t i=0UL; i<M; ++i )
1657  {
1658  if( IsDiagonal<MT1>::value )
1659  {
1660  y[i] -= x[i] * A(i,i);
1661  }
1662  else
1663  {
1664  const size_t jbegin( ( IsUpper<MT1>::value )
1665  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
1666  :( 0UL ) );
1667  const size_t jend( ( IsLower<MT1>::value )
1668  ?( IsStrictlyLower<MT1>::value ? i : i+1UL )
1669  :( N ) );
1670  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1671 
1672  const size_t jnum( jend - jbegin );
1673  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1674 
1675  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1676  y[j ] -= x[i] * A(i,j );
1677  y[j+1UL] -= x[i] * A(i,j+1UL);
1678  }
1679  if( jpos < jend ) {
1680  y[jpos] -= x[i] * A(i,jpos);
1681  }
1682  }
1683  }
1684  }
1686  //**********************************************************************************************
1687 
1688  //**Default subtraction assignment to dense vectors (small matrices)****************************
1702  template< typename VT1 // Type of the left-hand side target vector
1703  , typename VT2 // Type of the left-hand side vector operand
1704  , typename MT1 > // Type of the right-hand side matrix operand
1705  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1706  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1707  {
1708  selectDefaultSubAssignKernel( y, x, A );
1709  }
1711  //**********************************************************************************************
1712 
1713  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
1728  template< typename VT1 // Type of the left-hand side target vector
1729  , typename VT2 // Type of the left-hand side vector operand
1730  , typename MT1 > // Type of the right-hand side matrix operand
1731  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1732  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1733  {
1734  typedef IntrinsicTrait<ElementType> IT;
1735 
1736  const size_t M( A.rows() );
1737  const size_t N( A.columns() );
1738 
1739  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
1740 
1741  const size_t jpos( remainder ? ( N & size_t(-IT::size) ) : N );
1742  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % IT::size ) ) == jpos, "Invalid end calculation" );
1743 
1744  size_t j( 0UL );
1745 
1746  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
1747  {
1748  const size_t ibegin( ( IsLower<MT1>::value )
1749  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1750  :( 0UL ) );
1751  const size_t iend( ( IsUpper<MT1>::value )
1752  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1753  :( M ) );
1754  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1755 
1756  IntrinsicType xmm1( y.load(j ) );
1757  IntrinsicType xmm2( y.load(j+IT::size ) );
1758  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1759  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1760  IntrinsicType xmm5( y.load(j+IT::size*4UL) );
1761  IntrinsicType xmm6( y.load(j+IT::size*5UL) );
1762  IntrinsicType xmm7( y.load(j+IT::size*6UL) );
1763  IntrinsicType xmm8( y.load(j+IT::size*7UL) );
1764 
1765  for( size_t i=ibegin; i<iend; ++i ) {
1766  const IntrinsicType x1( set( x[i] ) );
1767  xmm1 = xmm1 - x1 * A.load(i,j );
1768  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1769  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1770  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1771  xmm5 = xmm5 - x1 * A.load(i,j+IT::size*4UL);
1772  xmm6 = xmm6 - x1 * A.load(i,j+IT::size*5UL);
1773  xmm7 = xmm7 - x1 * A.load(i,j+IT::size*6UL);
1774  xmm8 = xmm8 - x1 * A.load(i,j+IT::size*7UL);
1775  }
1776 
1777  y.store( j , xmm1 );
1778  y.store( j+IT::size , xmm2 );
1779  y.store( j+IT::size*2UL, xmm3 );
1780  y.store( j+IT::size*3UL, xmm4 );
1781  y.store( j+IT::size*4UL, xmm5 );
1782  y.store( j+IT::size*5UL, xmm6 );
1783  y.store( j+IT::size*6UL, xmm7 );
1784  y.store( j+IT::size*7UL, xmm8 );
1785  }
1786 
1787  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
1788  {
1789  const size_t ibegin( ( IsLower<MT1>::value )
1790  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1791  :( 0UL ) );
1792  const size_t iend( ( IsUpper<MT1>::value )
1793  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1794  :( M ) );
1795  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1796 
1797  IntrinsicType xmm1( y.load(j ) );
1798  IntrinsicType xmm2( y.load(j+IT::size ) );
1799  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1800  IntrinsicType xmm4( y.load(j+IT::size*3UL) );
1801 
1802  for( size_t i=ibegin; i<iend; ++i ) {
1803  const IntrinsicType x1( set( x[i] ) );
1804  xmm1 = xmm1 - x1 * A.load(i,j );
1805  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1806  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1807  xmm4 = xmm4 - x1 * A.load(i,j+IT::size*3UL);
1808  }
1809 
1810  y.store( j , xmm1 );
1811  y.store( j+IT::size , xmm2 );
1812  y.store( j+IT::size*2UL, xmm3 );
1813  y.store( j+IT::size*3UL, xmm4 );
1814  }
1815 
1816  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
1817  {
1818  const size_t ibegin( ( IsLower<MT1>::value )
1819  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1820  :( 0UL ) );
1821  const size_t iend( ( IsUpper<MT1>::value )
1822  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1823  :( M ) );
1824  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1825 
1826  IntrinsicType xmm1( y.load(j ) );
1827  IntrinsicType xmm2( y.load(j+IT::size ) );
1828  IntrinsicType xmm3( y.load(j+IT::size*2UL) );
1829 
1830  for( size_t i=ibegin; i<iend; ++i ) {
1831  const IntrinsicType x1( set( x[i] ) );
1832  xmm1 = xmm1 - x1 * A.load(i,j );
1833  xmm2 = xmm2 - x1 * A.load(i,j+IT::size );
1834  xmm3 = xmm3 - x1 * A.load(i,j+IT::size*2UL);
1835  }
1836 
1837  y.store( j , xmm1 );
1838  y.store( j+IT::size , xmm2 );
1839  y.store( j+IT::size*2UL, xmm3 );
1840  }
1841 
1842  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
1843  {
1844  const size_t ibegin( ( IsLower<MT1>::value )
1845  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1846  :( 0UL ) );
1847  const size_t iend( ( IsUpper<MT1>::value )
1848  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1849  :( M ) );
1850  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1851 
1852  IntrinsicType xmm1( y.load(j ) );
1853  IntrinsicType xmm2( y.load(j+IT::size) );
1854 
1855  for( size_t i=ibegin; i<iend; ++i ) {
1856  const IntrinsicType x1( set( x[i] ) );
1857  xmm1 = xmm1 - x1 * A.load(i,j );
1858  xmm2 = xmm2 - x1 * A.load(i,j+IT::size);
1859  }
1860 
1861  y.store( j , xmm1 );
1862  y.store( j+IT::size, xmm2 );
1863  }
1864 
1865  for( ; j<jpos; j+=IT::size )
1866  {
1867  const size_t ibegin( ( IsLower<MT1>::value )
1868  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1869  :( 0UL ) );
1870  const size_t iend( ( IsUpper<MT1>::value )
1871  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1872  :( M ) );
1873  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1874 
1875  IntrinsicType xmm1( y.load(j) );
1876 
1877  for( size_t i=ibegin; i<iend; ++i ) {
1878  xmm1 = xmm1 - set( x[i] ) * A.load(i,j);
1879  }
1880 
1881  y.store( j, xmm1 );
1882  }
1883 
1884  for( ; remainder && j<N; ++j )
1885  {
1886  const size_t ibegin( ( IsLower<MT1>::value )
1887  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
1888  :( 0UL ) );
1889  const size_t iend( ( IsUpper<MT1>::value )
1890  ?( min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
1891  :( M ) );
1892  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1893 
1894  ElementType value = ElementType();
1895 
1896  for( size_t i=ibegin; i<iend; ++i ) {
1897  value += x[i] * A(i,j);
1898  }
1899 
1900  y[j] -= value;
1901  }
1902  }
1904  //**********************************************************************************************
1905 
1906  //**Default subtraction assignment to dense vectors (large matrices)****************************
1920  template< typename VT1 // Type of the left-hand side target vector
1921  , typename VT2 // Type of the left-hand side vector operand
1922  , typename MT1 > // Type of the right-hand side matrix operand
1923  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1924  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1925  {
1926  selectDefaultSubAssignKernel( y, x, A );
1927  }
1929  //**********************************************************************************************
1930 
1931  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
1946  template< typename VT1 // Type of the left-hand side target vector
1947  , typename VT2 // Type of the left-hand side vector operand
1948  , typename MT1 > // Type of the right-hand side matrix operand
1949  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1> >::Type
1950  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
1951  {
1952  typedef IntrinsicTrait<ElementType> IT;
1953 
1954  const size_t M( A.rows() );
1955  const size_t N( A.columns() );
1956 
1957  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
1958 
1959  const size_t jblock( 32768UL / sizeof( ElementType ) );
1960  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
1961 
1962  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
1963 
1964  for( size_t jj=0U; jj<N; jj+=jblock ) {
1965  for( size_t ii=0UL; ii<M; ii+=iblock )
1966  {
1967  const size_t iend( min( ii+iblock, M ) );
1968  const size_t jtmp( min( jj+jblock, N ) );
1969  const size_t jend( ( IsLower<MT1>::value )
1970  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
1971  :( jtmp ) );
1972 
1973  const size_t jpos( remainder ? ( jend & size_t(-IT::size) ) : jend );
1974  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % IT::size ) ) == jpos, "Invalid end calculation" );
1975 
1976  size_t j( ( IsUpper<MT1>::value )
1977  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
1978  :( jj ) );
1979 
1980  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
1981  {
1982  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1983 
1984  for( size_t i=ii; i<iend; ++i ) {
1985  const IntrinsicType x1( set( x[i] ) );
1986  xmm1 = xmm1 + x1 * A.load(i,j );
1987  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
1988  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
1989  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
1990  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
1991  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
1992  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
1993  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
1994  }
1995 
1996  y.store( j , y.load(j ) - xmm1 );
1997  y.store( j+IT::size , y.load(j+IT::size ) - xmm2 );
1998  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3 );
1999  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4 );
2000  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5 );
2001  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6 );
2002  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7 );
2003  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8 );
2004  }
2005 
2006  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
2007  {
2008  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2009 
2010  for( size_t i=ii; i<iend; ++i ) {
2011  const IntrinsicType x1( set( x[i] ) );
2012  xmm1 = xmm1 + x1 * A.load(i,j );
2013  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2014  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2015  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2016  }
2017 
2018  y.store( j , y.load(j ) - xmm1 );
2019  y.store( j+IT::size , y.load(j+IT::size ) - xmm2 );
2020  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3 );
2021  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4 );
2022  }
2023 
2024  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
2025  {
2026  IntrinsicType xmm1, xmm2, xmm3;
2027 
2028  for( size_t i=ii; i<iend; ++i ) {
2029  const IntrinsicType x1( set( x[i] ) );
2030  xmm1 = xmm1 + x1 * A.load(i,j );
2031  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2032  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2033  }
2034 
2035  y.store( j , y.load(j ) - xmm1 );
2036  y.store( j+IT::size , y.load(j+IT::size ) - xmm2 );
2037  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3 );
2038  }
2039 
2040  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
2041  {
2042  IntrinsicType xmm1, xmm2;
2043 
2044  for( size_t i=ii; i<iend; ++i ) {
2045  const IntrinsicType x1( set( x[i] ) );
2046  xmm1 = xmm1 + x1 * A.load(i,j );
2047  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2048  }
2049 
2050  y.store( j , y.load(j ) - xmm1 );
2051  y.store( j+IT::size, y.load(j+IT::size) - xmm2 );
2052  }
2053 
2054  for( ; j<jpos; j+=IT::size )
2055  {
2056  IntrinsicType xmm1;
2057 
2058  for( size_t i=ii; i<iend; ++i ) {
2059  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2060  }
2061 
2062  y.store( j, y.load(j) - xmm1 );
2063  }
2064 
2065  for( ; remainder && j<jend; ++j )
2066  {
2067  ElementType value = ElementType();
2068 
2069  for( size_t i=ii; i<iend; ++i ) {
2070  value += x[i] * A(i,j);
2071  }
2072 
2073  y[j] -= value;
2074  }
2075  }
2076  }
2077  }
2079  //**********************************************************************************************
2080 
2081  //**BLAS-based subtraction assignment to dense vectors (default)********************************
2095  template< typename VT1 // Type of the left-hand side target vector
2096  , typename VT2 // Type of the left-hand side vector operand
2097  , typename MT1 > // Type of the right-hand side matrix operand
2098  static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
2099  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2100  {
2101  selectLargeSubAssignKernel( y, x, A );
2102  }
2104  //**********************************************************************************************
2105 
2106  //**BLAS-based subtraction assignment to dense vectors******************************************
2107 #if BLAZE_BLAS_MODE
2108 
2121  template< typename VT1 // Type of the left-hand side target vector
2122  , typename VT2 // Type of the left-hand side vector operand
2123  , typename MT1 > // Type of the right-hand side matrix operand
2124  static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1> >::Type
2125  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A )
2126  {
2127  typedef typename VT1::ElementType ET;
2128 
2129  if( IsTriangular<MT1>::value ) {
2130  typename VT1::ResultType tmp( serial( x ) );
2131  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
2132  subAssign( y, tmp );
2133  }
2134  else {
2135  gemv( y, x, A, ET(-1), ET(1) );
2136  }
2137  }
2139 #endif
2140  //**********************************************************************************************
2141 
2142  //**Subtraction assignment to sparse vectors****************************************************
2143  // No special implementation for the subtraction assignment to sparse vectors.
2144  //**********************************************************************************************
2145 
2146  //**Multiplication assignment to dense vectors**************************************************
2159  template< typename VT1 > // Type of the target dense vector
2160  friend inline void multAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2161  {
2163 
2167 
2168  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2169 
2170  const ResultType tmp( serial( rhs ) );
2171  multAssign( ~lhs, tmp );
2172  }
2174  //**********************************************************************************************
2175 
2176  //**Multiplication assignment to sparse vectors*************************************************
2177  // No special implementation for the multiplication assignment to sparse vectors.
2178  //**********************************************************************************************
2179 
2180  //**SMP assignment to dense vectors*************************************************************
2195  template< typename VT1 > // Type of the target dense vector
2196  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2197  smpAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2198  {
2200 
2201  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2202 
2203  if( rhs.mat_.rows() == 0UL ) {
2204  reset( ~lhs );
2205  return;
2206  }
2207  else if( rhs.mat_.columns() == 0UL ) {
2208  return;
2209  }
2210 
2211  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
2212  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
2213 
2214  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2215  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2216  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2217  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2218 
2219  smpAssign( ~lhs, x * A );
2220  }
2222  //**********************************************************************************************
2223 
2224  //**SMP assignment to sparse vectors************************************************************
2239  template< typename VT1 > // Type of the target sparse vector
2240  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2241  smpAssign( SparseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2242  {
2244 
2248 
2249  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2250 
2251  const ResultType tmp( rhs );
2252  smpAssign( ~lhs, tmp );
2253  }
2255  //**********************************************************************************************
2256 
2257  //**SMP addition assignment to dense vectors****************************************************
2272  template< typename VT1 > // Type of the target dense vector
2273  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2274  smpAddAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2275  {
2277 
2278  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2279 
2280  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2281  return;
2282  }
2283 
2284  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
2285  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
2286 
2287  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2288  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2289  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2290  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2291 
2292  smpAddAssign( ~lhs, x * A );
2293  }
2295  //**********************************************************************************************
2296 
2297  //**SMP addition assignment to sparse vectors***************************************************
2298  // No special implementation for the SMP addition assignment to sparse vectors.
2299  //**********************************************************************************************
2300 
2301  //**SMP subtraction assignment to dense vectors*************************************************
2316  template< typename VT1 > // Type of the target dense vector
2317  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2318  smpSubAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2319  {
2321 
2322  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2323 
2324  if( rhs.mat_.rows() == 0UL || rhs.mat_.columns() == 0UL ) {
2325  return;
2326  }
2327 
2328  LT x( rhs.vec_ ); // Evaluation of the left-hand side dense vector operand
2329  RT A( rhs.mat_ ); // Evaluation of the right-hand side dense matrix operand
2330 
2331  BLAZE_INTERNAL_ASSERT( x.size() == rhs.vec_.size() , "Invalid vector size" );
2332  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.mat_.rows() , "Invalid number of rows" );
2333  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.mat_.columns(), "Invalid number of columns" );
2334  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2335 
2336  smpSubAssign( ~lhs, x * A );
2337  }
2339  //**********************************************************************************************
2340 
2341  //**SMP subtraction assignment to sparse vectors************************************************
2342  // No special implementation for the SMP subtraction assignment to sparse vectors.
2343  //**********************************************************************************************
2344 
2345  //**SMP multiplication assignment to dense vectors**********************************************
2360  template< typename VT1 > // Type of the target dense vector
2361  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
2362  smpMultAssign( DenseVector<VT1,true>& lhs, const TDVecDMatMultExpr& rhs )
2363  {
2365 
2369 
2370  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2371 
2372  const ResultType tmp( rhs );
2373  smpMultAssign( ~lhs, tmp );
2374  }
2376  //**********************************************************************************************
2377 
2378  //**SMP multiplication assignment to sparse vectors*********************************************
2379  // No special implementation for the SMP multiplication assignment to sparse vectors.
2380  //**********************************************************************************************
2381 
2382  //**Compile time checks*************************************************************************
2390  //**********************************************************************************************
2391 };
2392 //*************************************************************************************************
2393 
2394 
2395 
2396 
2397 //=================================================================================================
2398 //
2399 // DVECSCALARMULTEXPR SPECIALIZATION
2400 //
2401 //=================================================================================================
2402 
2403 //*************************************************************************************************
2411 template< typename VT // Type of the left-hand side dense vector
2412  , typename MT // Type of the right-hand side dense matrix
2413  , typename ST > // Type of the side scalar value
2414 class DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >
2415  : public DenseVector< DVecScalarMultExpr< TDVecDMatMultExpr<VT,MT>, ST, true >, true >
2416  , private VecScalarMultExpr
2417  , private Computation
2418 {
2419  private:
2420  //**Type definitions****************************************************************************
2421  typedef TDVecDMatMultExpr<VT,MT> VMM;
2422  typedef typename VMM::ResultType RES;
2423  typedef typename VT::ResultType VRT;
2424  typedef typename MT::ResultType MRT;
2425  typedef typename VRT::ElementType VET;
2426  typedef typename MRT::ElementType MET;
2427  typedef typename VT::CompositeType VCT;
2428  typedef typename MT::CompositeType MCT;
2429  //**********************************************************************************************
2430 
2431  //**********************************************************************************************
2433  enum { evaluateVector = IsComputation<VT>::value || RequiresEvaluation<VT>::value };
2434  //**********************************************************************************************
2435 
2436  //**********************************************************************************************
2438  enum { evaluateMatrix = ( IsComputation<MT>::value && IsSame<MET,VET>::value &&
2439  IsBlasCompatible<MET>::value ) || RequiresEvaluation<MT>::value };
2440  //**********************************************************************************************
2441 
2442  //**********************************************************************************************
2444 
2447  template< typename T1 >
2448  struct UseSMPAssign {
2449  enum { value = ( evaluateVector || evaluateMatrix ) };
2450  };
2451  //**********************************************************************************************
2452 
2453  //**********************************************************************************************
2455 
2457  template< typename T1, typename T2, typename T3, typename T4 >
2458  struct UseBlasKernel {
2459  enum { value = BLAZE_BLAS_MODE &&
2460  HasMutableDataAccess<T1>::value &&
2461  HasConstDataAccess<T2>::value &&
2462  HasConstDataAccess<T3>::value &&
2463  !IsDiagonal<T3>::value &&
2464  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2465  IsBlasCompatible<typename T1::ElementType>::value &&
2466  IsBlasCompatible<typename T2::ElementType>::value &&
2467  IsBlasCompatible<typename T3::ElementType>::value &&
2468  IsSame< typename T1::ElementType, typename T2::ElementType >::value &&
2469  IsSame< typename T1::ElementType, typename T3::ElementType >::value &&
2470  !( IsBuiltin<typename T1::ElementType>::value && IsComplex<T4>::value ) };
2471  };
2472  //**********************************************************************************************
2473 
2474  //**********************************************************************************************
2476 
2479  template< typename T1, typename T2, typename T3, typename T4 >
2480  struct UseVectorizedDefaultKernel {
2481  enum { value = useOptimizedKernels &&
2482  !IsDiagonal<T3>::value &&
2483  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2484  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2485  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2486  IsSame<typename T1::ElementType,T4>::value &&
2487  IntrinsicTrait<typename T1::ElementType>::addition &&
2488  IntrinsicTrait<typename T1::ElementType>::multiplication };
2489  };
2490  //**********************************************************************************************
2491 
2492  public:
2493  //**Type definitions****************************************************************************
2494  typedef DVecScalarMultExpr<VMM,ST,true> This;
2495  typedef typename MultTrait<RES,ST>::Type ResultType;
2496  typedef typename ResultType::TransposeType TransposeType;
2497  typedef typename ResultType::ElementType ElementType;
2498  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2499  typedef const ElementType ReturnType;
2500  typedef const ResultType CompositeType;
2501 
2503  typedef const TDVecDMatMultExpr<VT,MT> LeftOperand;
2504 
2506  typedef ST RightOperand;
2507 
2509  typedef typename SelectType< evaluateVector, const VRT, VCT >::Type LT;
2510 
2512  typedef typename SelectType< evaluateMatrix, const MRT, MCT >::Type RT;
2513  //**********************************************************************************************
2514 
2515  //**Compilation flags***************************************************************************
2517  enum { vectorizable = !IsDiagonal<MT>::value &&
2518  VT::vectorizable && MT::vectorizable &&
2519  IsSame<VET,MET>::value &&
2520  IsSame<VET,ST>::value &&
2521  IntrinsicTrait<VET>::addition &&
2522  IntrinsicTrait<VET>::multiplication };
2523 
2525  enum { smpAssignable = !evaluateVector && VT::smpAssignable &&
2526  !evaluateMatrix && MT::smpAssignable };
2527  //**********************************************************************************************
2528 
2529  //**Constructor*********************************************************************************
2535  explicit inline DVecScalarMultExpr( const VMM& vector, ST scalar )
2536  : vector_( vector ) // Left-hand side dense vector of the multiplication expression
2537  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2538  {}
2539  //**********************************************************************************************
2540 
2541  //**Subscript operator**************************************************************************
2547  inline ReturnType operator[]( size_t index ) const {
2548  BLAZE_INTERNAL_ASSERT( index < vector_.size(), "Invalid vector access index" );
2549  return vector_[index] * scalar_;
2550  }
2551  //**********************************************************************************************
2552 
2553  //**At function*********************************************************************************
2560  inline ReturnType at( size_t index ) const {
2561  if( index >= vector_.size() ) {
2562  BLAZE_THROW_OUT_OF_RANGE( "Invalid vector access index" );
2563  }
2564  return (*this)[index];
2565  }
2566  //**********************************************************************************************
2567 
2568  //**Size function*******************************************************************************
2573  inline size_t size() const {
2574  return vector_.size();
2575  }
2576  //**********************************************************************************************
2577 
2578  //**Left operand access*************************************************************************
2583  inline LeftOperand leftOperand() const {
2584  return vector_;
2585  }
2586  //**********************************************************************************************
2587 
2588  //**Right operand access************************************************************************
2593  inline RightOperand rightOperand() const {
2594  return scalar_;
2595  }
2596  //**********************************************************************************************
2597 
2598  //**********************************************************************************************
2604  template< typename T >
2605  inline bool canAlias( const T* alias ) const {
2606  return vector_.canAlias( alias );
2607  }
2608  //**********************************************************************************************
2609 
2610  //**********************************************************************************************
2616  template< typename T >
2617  inline bool isAliased( const T* alias ) const {
2618  return vector_.isAliased( alias );
2619  }
2620  //**********************************************************************************************
2621 
2622  //**********************************************************************************************
2627  inline bool isAligned() const {
2628  return vector_.isAligned();
2629  }
2630  //**********************************************************************************************
2631 
2632  //**********************************************************************************************
2637  inline bool canSMPAssign() const {
2638  typename VMM::RightOperand A( vector_.rightOperand() );
2639  return ( !BLAZE_BLAS_IS_PARALLEL ||
2640  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2641  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) ) &&
2642  ( size() > SMP_TDVECDMATMULT_THRESHOLD );
2643  }
2644  //**********************************************************************************************
2645 
2646  private:
2647  //**Member variables****************************************************************************
2648  LeftOperand vector_;
2649  RightOperand scalar_;
2650  //**********************************************************************************************
2651 
2652  //**Assignment to dense vectors*****************************************************************
2664  template< typename VT1 > // Type of the target dense vector
2665  friend inline void assign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
2666  {
2668 
2669  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
2670 
2671  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
2672  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
2673 
2674  if( right.rows() == 0UL ) {
2675  reset( ~lhs );
2676  return;
2677  }
2678  else if( right.columns() == 0UL ) {
2679  return;
2680  }
2681 
2682  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
2683  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
2684 
2685  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
2686  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
2687  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
2688  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
2689 
2690  DVecScalarMultExpr::selectAssignKernel( ~lhs, x, A, rhs.scalar_ );
2691  }
2692  //**********************************************************************************************
2693 
2694  //**Assignment to dense vectors (kernel selection)**********************************************
2705  template< typename VT1 // Type of the left-hand side target vector
2706  , typename VT2 // Type of the left-hand side vector operand
2707  , typename MT1 // Type of the right-hand side matrix operand
2708  , typename ST2 > // Type of the scalar value
2709  static inline void selectAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2710  {
2711  if( ( IsDiagonal<MT1>::value ) ||
2712  ( IsComputation<MT>::value && !evaluateMatrix ) ||
2713  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
2714  selectSmallAssignKernel( y, x, A, scalar );
2715  else
2716  selectBlasAssignKernel( y, x, A, scalar );
2717  }
2718  //**********************************************************************************************
2719 
2720  //**Default assignment to dense vectors*********************************************************
2734  template< typename VT1 // Type of the left-hand side target vector
2735  , typename VT2 // Type of the left-hand side vector operand
2736  , typename MT1 // Type of the right-hand side matrix operand
2737  , typename ST2 > // Type of the scalar value
2738  static inline void selectDefaultAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2739  {
2740  const size_t M( A.rows() );
2741  const size_t N( A.columns() );
2742 
2743  if( IsStrictlyUpper<MT1>::value ) {
2744  reset( y[0] );
2745  }
2746 
2747  if( !IsLower<MT1>::value )
2748  {
2749  for( size_t j=( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ); j<N; ++j ) {
2750  y[j] = x[0UL] * A(0UL,j);
2751  }
2752  }
2753 
2754  for( size_t i=( IsLower<MT1>::value && !IsStrictlyLower<MT1>::value ? 0UL : 1UL ); i<M; ++i )
2755  {
2756  if( IsDiagonal<MT1>::value )
2757  {
2758  y[i] = x[i] * A(i,i) * scalar;
2759  }
2760  else
2761  {
2762  const size_t jbegin( ( IsUpper<MT1>::value )
2763  ?( IsStrictlyUpper<MT1>::value ? i+1UL : i )
2764  :( 0UL ) );
2765  const size_t jend( ( IsLower<MT1>::value )
2766  ?( IsStrictlyLower<MT1>::value ? i-1UL : i )
2767  :( N ) );
2768  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2769 
2770  const size_t jnum( jend - jbegin );
2771  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
2772 
2773  for( size_t j=jbegin; j<jpos; j+=2UL ) {
2774  y[j ] += x[i] * A(i,j );
2775  y[j+1UL] += x[i] * A(i,j+1UL);
2776  }
2777  if( jpos < jend ) {
2778  y[jpos] += x[i] * A(i,jpos);
2779  }
2780  if( IsLower<MT1>::value ) {
2781  y[jend] = x[i] * A(i,jend);
2782  }
2783  }
2784  }
2785 
2786  if( IsStrictlyLower<MT1>::value ) {
2787  reset( y[N-1UL] );
2788  }
2789 
2790  if( !IsDiagonal<MT1>::value )
2791  {
2792  const size_t iend( IsStrictlyLower<MT1>::value ? N-1UL : N );
2793  for( size_t j=( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ); j<iend; ++j ) {
2794  y[j] *= scalar;
2795  }
2796  }
2797  }
2798  //**********************************************************************************************
2799 
2800  //**Default assignment to dense vectors (small matrices)****************************************
2814  template< typename VT1 // Type of the left-hand side target vector
2815  , typename VT2 // Type of the left-hand side vector operand
2816  , typename MT1 // Type of the right-hand side matrix operand
2817  , typename ST2 > // Type of the scalar value
2818  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2819  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2820  {
2821  selectDefaultAssignKernel( y, x, A, scalar );
2822  }
2823  //**********************************************************************************************
2824 
2825  //**Default assignment to dense vectors (small matrices)****************************************
2839  template< typename VT1 // Type of the left-hand side target vector
2840  , typename VT2 // Type of the left-hand side vector operand
2841  , typename MT1 // Type of the right-hand side matrix operand
2842  , typename ST2 > // Type of the scalar value
2843  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
2844  selectSmallAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
2845  {
2846  typedef IntrinsicTrait<ElementType> IT;
2847 
2848  const size_t M( A.rows() );
2849  const size_t N( A.columns() );
2850 
2851  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
2852 
2853  const size_t jpos( remainder ? ( N & size_t(-IT::size) ) : N );
2854  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % IT::size ) ) == jpos, "Invalid end calculation" );
2855 
2856  const IntrinsicType factor( set( scalar ) );
2857 
2858  size_t j( 0UL );
2859 
2860  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
2861  {
2862  const size_t ibegin( ( IsLower<MT1>::value )
2863  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2864  :( 0UL ) );
2865  const size_t iend( ( IsUpper<MT1>::value )
2866  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2867  :( M ) );
2868  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2869 
2870  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2871 
2872  for( size_t i=ibegin; i<iend; ++i ) {
2873  const IntrinsicType x1( set( x[i] ) );
2874  xmm1 = xmm1 + x1 * A.load(i,j );
2875  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2876  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2877  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2878  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
2879  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
2880  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
2881  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
2882  }
2883 
2884  y.store( j , xmm1*factor );
2885  y.store( j+IT::size , xmm2*factor );
2886  y.store( j+IT::size*2UL, xmm3*factor );
2887  y.store( j+IT::size*3UL, xmm4*factor );
2888  y.store( j+IT::size*4UL, xmm5*factor );
2889  y.store( j+IT::size*5UL, xmm6*factor );
2890  y.store( j+IT::size*6UL, xmm7*factor );
2891  y.store( j+IT::size*7UL, xmm8*factor );
2892  }
2893 
2894  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
2895  {
2896  const size_t ibegin( ( IsLower<MT1>::value )
2897  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2898  :( 0UL ) );
2899  const size_t iend( ( IsUpper<MT1>::value )
2900  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2901  :( M ) );
2902  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2903 
2904  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2905 
2906  for( size_t i=ibegin; i<iend; ++i ) {
2907  const IntrinsicType x1( set( x[i] ) );
2908  xmm1 = xmm1 + x1 * A.load(i,j );
2909  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2910  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2911  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
2912  }
2913 
2914  y.store( j , xmm1*factor );
2915  y.store( j+IT::size , xmm2*factor );
2916  y.store( j+IT::size*2UL, xmm3*factor );
2917  y.store( j+IT::size*3UL, xmm4*factor );
2918  }
2919 
2920  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
2921  {
2922  const size_t ibegin( ( IsLower<MT1>::value )
2923  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2924  :( 0UL ) );
2925  const size_t iend( ( IsUpper<MT1>::value )
2926  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2927  :( M ) );
2928  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2929 
2930  IntrinsicType xmm1, xmm2, xmm3;
2931 
2932  for( size_t i=ibegin; i<iend; ++i ) {
2933  const IntrinsicType x1( set( x[i] ) );
2934  xmm1 = xmm1 + x1 * A.load(i,j );
2935  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
2936  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
2937  }
2938 
2939  y.store( j , xmm1*factor );
2940  y.store( j+IT::size , xmm2*factor );
2941  y.store( j+IT::size*2UL, xmm3*factor );
2942  }
2943 
2944  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
2945  {
2946  const size_t ibegin( ( IsLower<MT1>::value )
2947  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2948  :( 0UL ) );
2949  const size_t iend( ( IsUpper<MT1>::value )
2950  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2951  :( M ) );
2952  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2953 
2954  IntrinsicType xmm1, xmm2;
2955 
2956  for( size_t i=ibegin; i<iend; ++i ) {
2957  const IntrinsicType x1( set( x[i] ) );
2958  xmm1 = xmm1 + x1 * A.load(i,j );
2959  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
2960  }
2961 
2962  y.store( j , xmm1*factor );
2963  y.store( j+IT::size, xmm2*factor );
2964  }
2965 
2966  for( ; j<jpos; j+=IT::size )
2967  {
2968  const size_t ibegin( ( IsLower<MT1>::value )
2969  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2970  :( 0UL ) );
2971  const size_t iend( ( IsUpper<MT1>::value )
2972  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2973  :( M ) );
2974  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2975 
2976  IntrinsicType xmm1;
2977 
2978  for( size_t i=ibegin; i<iend; ++i ) {
2979  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
2980  }
2981 
2982  y.store( j, xmm1*factor );
2983  }
2984 
2985  for( ; remainder && j<N; ++j )
2986  {
2987  const size_t ibegin( ( IsLower<MT1>::value )
2988  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
2989  :( 0UL ) );
2990  const size_t iend( ( IsUpper<MT1>::value )
2991  ?( min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
2992  :( M ) );
2993  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2994 
2995  ElementType value = ElementType();
2996 
2997  for( size_t i=ibegin; i<iend; ++i ) {
2998  value += x[i] * A(i,j);
2999  }
3000 
3001  y[j] = value * scalar;
3002  }
3003  }
3004  //**********************************************************************************************
3005 
3006  //**Default assignment to dense vectors (large matrices)****************************************
3020  template< typename VT1 // Type of the left-hand side target vector
3021  , typename VT2 // Type of the left-hand side vector operand
3022  , typename MT1 // Type of the right-hand side matrix operand
3023  , typename ST2 > // Type of the scalar value
3024  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3025  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3026  {
3027  selectDefaultAssignKernel( y, x, A, scalar );
3028  }
3029  //**********************************************************************************************
3030 
3031  //**Default assignment to dense vectors (large matrices)****************************************
3045  template< typename VT1 // Type of the left-hand side target vector
3046  , typename VT2 // Type of the left-hand side vector operand
3047  , typename MT1 // Type of the right-hand side matrix operand
3048  , typename ST2 > // Type of the scalar value
3049  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3050  selectLargeAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3051  {
3052  typedef IntrinsicTrait<ElementType> IT;
3053 
3054  const size_t M( A.rows() );
3055  const size_t N( A.columns() );
3056 
3057  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
3058 
3059  const size_t jblock( 32768UL / sizeof( ElementType ) );
3060  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
3061 
3062  const IntrinsicType factor( set( scalar ) );
3063 
3064  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
3065 
3066  reset( y );
3067 
3068  for( size_t jj=0U; jj<N; jj+=jblock ) {
3069  for( size_t ii=0UL; ii<M; ii+=iblock )
3070  {
3071  const size_t iend( min( ii+iblock, M ) );
3072  const size_t jtmp( min( jj+jblock, N ) );
3073  const size_t jend( ( IsLower<MT1>::value )
3074  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
3075  :( jtmp ) );
3076 
3077  const size_t jpos( remainder ? ( jend & size_t(-IT::size) ) : jend );
3078  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % IT::size ) ) == jpos, "Invalid end calculation" );
3079 
3080  size_t j( ( IsUpper<MT1>::value )
3081  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
3082  :( jj ) );
3083 
3084  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
3085  {
3086  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3087 
3088  for( size_t i=ii; i<iend; ++i ) {
3089  const IntrinsicType x1( set( x[i] ) );
3090  xmm1 = xmm1 + x1 * A.load(i,j );
3091  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3092  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3093  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3094  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3095  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3096  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3097  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3098  }
3099 
3100  y.store( j , y.load(j ) + xmm1*factor );
3101  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3102  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3103  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3104  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
3105  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
3106  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
3107  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
3108  }
3109 
3110  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
3111  {
3112  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3113 
3114  for( size_t i=ii; i<iend; ++i ) {
3115  const IntrinsicType x1( set( x[i] ) );
3116  xmm1 = xmm1 + x1 * A.load(i,j );
3117  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3118  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3119  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3120  }
3121 
3122  y.store( j , y.load(j ) + xmm1*factor );
3123  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3124  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3125  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3126  }
3127 
3128  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
3129  {
3130  IntrinsicType xmm1, xmm2, xmm3;
3131 
3132  for( size_t i=ii; i<iend; ++i ) {
3133  const IntrinsicType x1( set( x[i] ) );
3134  xmm1 = xmm1 + x1 * A.load(i,j );
3135  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3136  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3137  }
3138 
3139  y.store( j , y.load(j ) + xmm1*factor );
3140  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3141  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3142  }
3143 
3144  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
3145  {
3146  IntrinsicType xmm1, xmm2;
3147 
3148  for( size_t i=ii; i<iend; ++i ) {
3149  const IntrinsicType x1( set( x[i] ) );
3150  xmm1 = xmm1 + x1 * A.load(i,j );
3151  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
3152  }
3153 
3154  y.store( j , y.load(j ) + xmm1*factor );
3155  y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
3156  }
3157 
3158  for( ; j<jpos; j+=IT::size )
3159  {
3160  IntrinsicType xmm1;
3161 
3162  for( size_t i=ii; i<iend; ++i ) {
3163  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
3164  }
3165 
3166  y.store( j, y.load(j) + xmm1*factor );
3167  }
3168 
3169  for( ; remainder && j<jend; ++j )
3170  {
3171  ElementType value = ElementType();
3172 
3173  for( size_t i=ii; i<iend; ++i ) {
3174  value += x[i] * A(i,j);
3175  }
3176 
3177  y[j] += value * scalar;
3178  }
3179  }
3180  }
3181  }
3182  //**********************************************************************************************
3183 
3184  //**BLAS-based assignment to dense vectors (default)********************************************
3197  template< typename VT1 // Type of the left-hand side target vector
3198  , typename VT2 // Type of the left-hand side vector operand
3199  , typename MT1 // Type of the right-hand side matrix operand
3200  , typename ST2 > // Type of the scalar value
3201  static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
3202  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3203  {
3204  selectLargeAssignKernel( y, x, A, scalar );
3205  }
3206  //**********************************************************************************************
3207 
3208  //**BLAS-based assignment to dense vectors******************************************************
3209 #if BLAZE_BLAS_MODE
3210 
3223  template< typename VT1 // Type of the left-hand side target vector
3224  , typename VT2 // Type of the left-hand side vector operand
3225  , typename MT1 // Type of the right-hand side matrix operand
3226  , typename ST2 > // Type of the scalar value
3227  static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
3228  selectBlasAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3229  {
3230  typedef typename VT1::ElementType ET;
3231 
3232  if( IsTriangular<MT1>::value ) {
3233  assign( y, scalar * x );
3234  trmv( y, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3235  }
3236  else {
3237  gemv( y, x, A, ET(scalar), ET(0) );
3238  }
3239  }
3240 #endif
3241  //**********************************************************************************************
3242 
3243  //**Assignment to sparse vectors****************************************************************
3255  template< typename VT1 > // Type of the target sparse vector
3256  friend inline void assign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3257  {
3259 
3263 
3264  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3265 
3266  const ResultType tmp( serial( rhs ) );
3267  assign( ~lhs, tmp );
3268  }
3269  //**********************************************************************************************
3270 
3271  //**Addition assignment to dense vectors********************************************************
3283  template< typename VT1 > // Type of the target dense vector
3284  friend inline void addAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3285  {
3287 
3288  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3289 
3290  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3291  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3292 
3293  if( right.rows() == 0UL || right.columns() == 0UL ) {
3294  return;
3295  }
3296 
3297  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
3298  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3299 
3300  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
3301  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
3302  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
3303  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
3304 
3305  DVecScalarMultExpr::selectAddAssignKernel( ~lhs, x, A, rhs.scalar_ );
3306  }
3307  //**********************************************************************************************
3308 
3309  //**Addition assignment to dense vectors (kernel selection)*************************************
3320  template< typename VT1 // Type of the left-hand side target vector
3321  , typename VT2 // Type of the left-hand side vector operand
3322  , typename MT1 // Type of the right-hand side matrix operand
3323  , typename ST2 > // Type of the scalar value
3324  static inline void selectAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3325  {
3326  if( ( IsDiagonal<MT1>::value ) ||
3327  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3328  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
3329  selectSmallAddAssignKernel( y, x, A, scalar );
3330  else
3331  selectBlasAddAssignKernel( y, x, A, scalar );
3332  }
3333  //**********************************************************************************************
3334 
3335  //**Default addition assignment to dense vectors************************************************
3349  template< typename VT1 // Type of the left-hand side target vector
3350  , typename VT2 // Type of the left-hand side vector operand
3351  , typename MT1 // Type of the right-hand side matrix operand
3352  , typename ST2 > // Type of the scalar value
3353  static inline void selectDefaultAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3354  {
3355  y.addAssign( x * A * scalar );
3356  }
3357  //**********************************************************************************************
3358 
3359  //**Default addition assignment to dense vectors (small matrices)*******************************
3373  template< typename VT1 // Type of the left-hand side target vector
3374  , typename VT2 // Type of the left-hand side vector operand
3375  , typename MT1 // Type of the right-hand side matrix operand
3376  , typename ST2 > // Type of the scalar value
3377  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3378  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3379  {
3380  selectDefaultAddAssignKernel( y, x, A, scalar );
3381  }
3382  //**********************************************************************************************
3383 
3384  //**Vectorized default addition assignment to dense vectors (small matrices)********************
3399  template< typename VT1 // Type of the left-hand side target vector
3400  , typename VT2 // Type of the left-hand side vector operand
3401  , typename MT1 // Type of the right-hand side matrix operand
3402  , typename ST2 > // Type of the scalar value
3403  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3404  selectSmallAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3405  {
3406  typedef IntrinsicTrait<ElementType> IT;
3407 
3408  const size_t M( A.rows() );
3409  const size_t N( A.columns() );
3410 
3411  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
3412 
3413  const size_t jpos( remainder ? ( N & size_t(-IT::size) ) : N );
3414  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % IT::size ) ) == jpos, "Invalid end calculation" );
3415 
3416  const IntrinsicType factor( set( scalar ) );
3417 
3418  size_t j( 0UL );
3419 
3420  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
3421  {
3422  const size_t ibegin( ( IsLower<MT1>::value )
3423  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3424  :( 0UL ) );
3425  const size_t iend( ( IsUpper<MT1>::value )
3426  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3427  :( M ) );
3428  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3429 
3430  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3431 
3432  for( size_t i=ibegin; i<iend; ++i ) {
3433  const IntrinsicType x1( set( x[i] ) );
3434  xmm1 = xmm1 + x1 * A.load(i,j );
3435  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3436  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3437  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3438  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3439  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3440  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3441  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3442  }
3443 
3444  y.store( j , y.load(j ) + xmm1*factor );
3445  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3446  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3447  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3448  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
3449  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
3450  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
3451  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
3452  }
3453 
3454  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
3455  {
3456  const size_t ibegin( ( IsLower<MT1>::value )
3457  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3458  :( 0UL ) );
3459  const size_t iend( ( IsUpper<MT1>::value )
3460  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3461  :( M ) );
3462  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3463 
3464  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3465 
3466  for( size_t i=ibegin; i<iend; ++i ) {
3467  const IntrinsicType x1( set( x[i] ) );
3468  xmm1 = xmm1 + x1 * A.load(i,j );
3469  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3470  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3471  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3472  }
3473 
3474  y.store( j , y.load(j ) + xmm1*factor );
3475  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3476  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3477  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3478  }
3479 
3480  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
3481  {
3482  const size_t ibegin( ( IsLower<MT1>::value )
3483  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3484  :( 0UL ) );
3485  const size_t iend( ( IsUpper<MT1>::value )
3486  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3487  :( M ) );
3488  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3489 
3490  IntrinsicType xmm1, xmm2, xmm3;
3491 
3492  for( size_t i=ibegin; i<iend; ++i ) {
3493  const IntrinsicType x1( set( x[i] ) );
3494  xmm1 = xmm1 + x1 * A.load(i,j );
3495  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3496  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3497  }
3498 
3499  y.store( j , y.load(j ) + xmm1*factor );
3500  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3501  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3502  }
3503 
3504  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
3505  {
3506  const size_t ibegin( ( IsLower<MT1>::value )
3507  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3508  :( 0UL ) );
3509  const size_t iend( ( IsUpper<MT1>::value )
3510  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3511  :( M ) );
3512  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3513 
3514  IntrinsicType xmm1, xmm2;
3515 
3516  for( size_t i=ibegin; i<iend; ++i ) {
3517  const IntrinsicType x1( set( x[i] ) );
3518  xmm1 = xmm1 + x1 * A.load(i,j );
3519  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
3520  }
3521 
3522  y.store( j , y.load(j ) + xmm1*factor );
3523  y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
3524  }
3525 
3526  for( ; j<jpos; j+=IT::size )
3527  {
3528  const size_t ibegin( ( IsLower<MT1>::value )
3529  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3530  :( 0UL ) );
3531  const size_t iend( ( IsUpper<MT1>::value )
3532  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3533  :( M ) );
3534  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3535 
3536  IntrinsicType xmm1;
3537 
3538  for( size_t i=ibegin; i<iend; ++i ) {
3539  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
3540  }
3541 
3542  y.store( j, y.load(j) + xmm1*factor );
3543  }
3544 
3545  for( ; remainder && j<N; ++j )
3546  {
3547  const size_t ibegin( ( IsLower<MT1>::value )
3548  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3549  :( 0UL ) );
3550  const size_t iend( ( IsUpper<MT1>::value )
3551  ?( min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3552  :( M ) );
3553  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3554 
3555  ElementType value = ElementType();
3556 
3557  for( size_t i=ibegin; i<iend; ++i ) {
3558  value += x[i] * A(i,j);
3559  }
3560 
3561  y[j] += value * scalar;
3562  }
3563  }
3564  //**********************************************************************************************
3565 
3566  //**Default addition assignment to dense vectors (large matrices)*******************************
3580  template< typename VT1 // Type of the left-hand side target vector
3581  , typename VT2 // Type of the left-hand side vector operand
3582  , typename MT1 // Type of the right-hand side matrix operand
3583  , typename ST2 > // Type of the scalar value
3584  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3585  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3586  {
3587  selectDefaultAddAssignKernel( y, x, A, scalar );
3588  }
3589  //**********************************************************************************************
3590 
3591  //**Vectorized default addition assignment to dense vectors (large matrices)********************
3606  template< typename VT1 // Type of the left-hand side target vector
3607  , typename VT2 // Type of the left-hand side vector operand
3608  , typename MT1 // Type of the right-hand side matrix operand
3609  , typename ST2 > // Type of the scalar value
3610  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3611  selectLargeAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3612  {
3613  typedef IntrinsicTrait<ElementType> IT;
3614 
3615  const size_t M( A.rows() );
3616  const size_t N( A.columns() );
3617 
3618  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
3619 
3620  const size_t jblock( 32768UL / sizeof( ElementType ) );
3621  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
3622 
3623  const IntrinsicType factor( set( scalar ) );
3624 
3625  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
3626 
3627  for( size_t jj=0U; jj<N; jj+=jblock ) {
3628  for( size_t ii=0UL; ii<M; ii+=iblock )
3629  {
3630  const size_t iend( min( ii+iblock, M ) );
3631  const size_t jtmp( min( jj+jblock, N ) );
3632  const size_t jend( ( IsLower<MT1>::value )
3633  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
3634  :( jtmp ) );
3635 
3636  const size_t jpos( remainder ? ( jend & size_t(-IT::size) ) : jend );
3637  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % IT::size ) ) == jpos, "Invalid end calculation" );
3638 
3639  size_t j( ( IsUpper<MT1>::value )
3640  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
3641  :( jj ) );
3642 
3643  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
3644  {
3645  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3646 
3647  for( size_t i=ii; i<iend; ++i ) {
3648  const IntrinsicType x1( set( x[i] ) );
3649  xmm1 = xmm1 + x1 * A.load(i,j );
3650  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3651  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3652  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3653  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3654  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3655  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3656  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3657  }
3658 
3659  y.store( j , y.load(j ) + xmm1*factor );
3660  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3661  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3662  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3663  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) + xmm5*factor );
3664  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) + xmm6*factor );
3665  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) + xmm7*factor );
3666  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) + xmm8*factor );
3667  }
3668 
3669  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
3670  {
3671  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3672 
3673  for( size_t i=ii; i<iend; ++i ) {
3674  const IntrinsicType x1( set( x[i] ) );
3675  xmm1 = xmm1 + x1 * A.load(i,j );
3676  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3677  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3678  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3679  }
3680 
3681  y.store( j , y.load(j ) + xmm1*factor );
3682  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3683  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3684  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) + xmm4*factor );
3685  }
3686 
3687  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
3688  {
3689  IntrinsicType xmm1, xmm2, xmm3;
3690 
3691  for( size_t i=ii; i<iend; ++i ) {
3692  const IntrinsicType x1( set( x[i] ) );
3693  xmm1 = xmm1 + x1 * A.load(i,j );
3694  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3695  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3696  }
3697 
3698  y.store( j , y.load(j ) + xmm1*factor );
3699  y.store( j+IT::size , y.load(j+IT::size ) + xmm2*factor );
3700  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) + xmm3*factor );
3701  }
3702 
3703  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
3704  {
3705  IntrinsicType xmm1, xmm2;
3706 
3707  for( size_t i=ii; i<iend; ++i ) {
3708  const IntrinsicType x1( set( x[i] ) );
3709  xmm1 = xmm1 + x1 * A.load(i,j );
3710  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
3711  }
3712 
3713  y.store( j , y.load(j ) + xmm1*factor );
3714  y.store( j+IT::size, y.load(j+IT::size) + xmm2*factor );
3715  }
3716 
3717  for( ; j<jpos; j+=IT::size )
3718  {
3719  IntrinsicType xmm1;
3720 
3721  for( size_t i=ii; i<iend; ++i ) {
3722  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
3723  }
3724 
3725  y.store( j, y.load(j) + xmm1*factor );
3726  }
3727 
3728  for( ; remainder && j<jend; ++j )
3729  {
3730  ElementType value = ElementType();
3731 
3732  for( size_t i=ii; i<iend; ++i ) {
3733  value += x[i] * A(i,j);
3734  }
3735 
3736  y[j] += value * scalar;
3737  }
3738  }
3739  }
3740  }
3741  //**********************************************************************************************
3742 
3743  //**BLAS-based addition assignment to dense vectors (default)***********************************
3757  template< typename VT1 // Type of the left-hand side target vector
3758  , typename VT2 // Type of the left-hand side vector operand
3759  , typename MT1 // Type of the right-hand side matrix operand
3760  , typename ST2 > // Type of the scalar value
3761  static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
3762  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3763  {
3764  selectLargeAddAssignKernel( y, x, A, scalar );
3765  }
3766  //**********************************************************************************************
3767 
3768  //**BLAS-based addition assignment to dense vectors*********************************************
3769 #if BLAZE_BLAS_MODE
3770 
3783  template< typename VT1 // Type of the left-hand side target vector
3784  , typename VT2 // Type of the left-hand side vector operand
3785  , typename MT1 // Type of the right-hand side matrix operand
3786  , typename ST2 > // Type of the scalar value
3787  static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
3788  selectBlasAddAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3789  {
3790  typedef typename VT1::ElementType ET;
3791 
3792  if( IsTriangular<MT1>::value ) {
3793  typename VT1::ResultType tmp( serial( scalar * x ) );
3794  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
3795  addAssign( y, tmp );
3796  }
3797  else {
3798  gemv( y, x, A, ET(scalar), ET(1) );
3799  }
3800  }
3801 #endif
3802  //**********************************************************************************************
3803 
3804  //**Addition assignment to sparse vectors*******************************************************
3805  // No special implementation for the addition assignment to sparse vectors.
3806  //**********************************************************************************************
3807 
3808  //**Subtraction assignment to dense vectors*****************************************************
3820  template< typename VT1 > // Type of the target dense vector
3821  friend inline void subAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
3822  {
3824 
3825  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
3826 
3827  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
3828  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
3829 
3830  if( right.rows() == 0UL || right.columns() == 0UL ) {
3831  return;
3832  }
3833 
3834  LT x( serial( left ) ); // Evaluation of the left-hand side dense vector operand
3835  RT A( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3836 
3837  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
3838  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
3839  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
3840  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
3841 
3842  DVecScalarMultExpr::selectSubAssignKernel( ~lhs, x, A, rhs.scalar_ );
3843  }
3844  //**********************************************************************************************
3845 
3846  //**Subtraction assignment to dense vectors (kernel selection)**********************************
3857  template< typename VT1 // Type of the left-hand side target vector
3858  , typename VT2 // Type of the left-hand side vector operand
3859  , typename MT1 // Type of the right-hand side matrix operand
3860  , typename ST2 > // Type of the scalar value
3861  static inline void selectSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3862  {
3863  if( ( IsDiagonal<MT1>::value ) ||
3864  ( IsComputation<MT>::value && !evaluateMatrix ) ||
3865  ( A.rows() * A.columns() < TDVECDMATMULT_THRESHOLD ) )
3866  selectSmallSubAssignKernel( y, x, A, scalar );
3867  else
3868  selectBlasSubAssignKernel( y, x, A, scalar );
3869  }
3870  //**********************************************************************************************
3871 
3872  //**Default subtraction assignment to dense vectors*********************************************
3886  template< typename VT1 // Type of the left-hand side target vector
3887  , typename VT2 // Type of the left-hand side vector operand
3888  , typename MT1 // Type of the right-hand side matrix operand
3889  , typename ST2 > // Type of the scalar value
3890  static inline void selectDefaultSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3891  {
3892  y.subAssign( x * A * scalar );
3893  }
3894  //**********************************************************************************************
3895 
3896  //**Default subtraction assignment to dense vectors (small matrices)****************************
3910  template< typename VT1 // Type of the left-hand side target vector
3911  , typename VT2 // Type of the left-hand side vector operand
3912  , typename MT1 // Type of the right-hand side matrix operand
3913  , typename ST2 > // Type of the scalar value
3914  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3915  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3916  {
3917  selectDefaultSubAssignKernel( y, x, A, scalar );
3918  }
3919  //**********************************************************************************************
3920 
3921  //**Vectorized default subtraction assignment to dense vectors (small matrices)*****************
3936  template< typename VT1 // Type of the left-hand side target vector
3937  , typename VT2 // Type of the left-hand side vector operand
3938  , typename MT1 // Type of the right-hand side matrix operand
3939  , typename ST2 > // Type of the scalar value
3940  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
3941  selectSmallSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
3942  {
3943  typedef IntrinsicTrait<ElementType> IT;
3944 
3945  const size_t M( A.rows() );
3946  const size_t N( A.columns() );
3947 
3948  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
3949 
3950  const size_t jpos( remainder ? ( N & size_t(-IT::size) ) : N );
3951  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % IT::size ) ) == jpos, "Invalid end calculation" );
3952 
3953  const IntrinsicType factor( set( scalar ) );
3954 
3955  size_t j( 0UL );
3956 
3957  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
3958  {
3959  const size_t ibegin( ( IsLower<MT1>::value )
3960  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3961  :( 0UL ) );
3962  const size_t iend( ( IsUpper<MT1>::value )
3963  ?( min( j+IT::size*8UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3964  :( M ) );
3965  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3966 
3967  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3968 
3969  for( size_t i=ibegin; i<iend; ++i ) {
3970  const IntrinsicType x1( set( x[i] ) );
3971  xmm1 = xmm1 + x1 * A.load(i,j );
3972  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
3973  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
3974  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
3975  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
3976  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
3977  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
3978  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
3979  }
3980 
3981  y.store( j , y.load(j ) - xmm1*factor );
3982  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
3983  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
3984  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
3985  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5*factor );
3986  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6*factor );
3987  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7*factor );
3988  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8*factor );
3989  }
3990 
3991  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
3992  {
3993  const size_t ibegin( ( IsLower<MT1>::value )
3994  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
3995  :( 0UL ) );
3996  const size_t iend( ( IsUpper<MT1>::value )
3997  ?( min( j+IT::size*4UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
3998  :( M ) );
3999  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4000 
4001  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4002 
4003  for( size_t i=ibegin; i<iend; ++i ) {
4004  const IntrinsicType x1( set( x[i] ) );
4005  xmm1 = xmm1 + x1 * A.load(i,j );
4006  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4007  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4008  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
4009  }
4010 
4011  y.store( j , y.load(j ) - xmm1*factor );
4012  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4013  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4014  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
4015  }
4016 
4017  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
4018  {
4019  const size_t ibegin( ( IsLower<MT1>::value )
4020  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4021  :( 0UL ) );
4022  const size_t iend( ( IsUpper<MT1>::value )
4023  ?( min( j+IT::size*3UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4024  :( M ) );
4025  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4026 
4027  IntrinsicType xmm1, xmm2, xmm3;
4028 
4029  for( size_t i=ibegin; i<iend; ++i ) {
4030  const IntrinsicType x1( set( x[i] ) );
4031  xmm1 = xmm1 + x1 * A.load(i,j );
4032  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4033  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4034  }
4035 
4036  y.store( j , y.load(j ) - xmm1*factor );
4037  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4038  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4039  }
4040 
4041  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
4042  {
4043  const size_t ibegin( ( IsLower<MT1>::value )
4044  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4045  :( 0UL ) );
4046  const size_t iend( ( IsUpper<MT1>::value )
4047  ?( min( j+IT::size*2UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4048  :( M ) );
4049  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4050 
4051  IntrinsicType xmm1, xmm2;
4052 
4053  for( size_t i=ibegin; i<iend; ++i ) {
4054  const IntrinsicType x1( set( x[i] ) );
4055  xmm1 = xmm1 + x1 * A.load(i,j );
4056  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
4057  }
4058 
4059  y.store( j , y.load(j ) - xmm1*factor );
4060  y.store( j+IT::size, y.load(j+IT::size) - xmm2*factor );
4061  }
4062 
4063  for( ; j<jpos; j+=IT::size )
4064  {
4065  const size_t ibegin( ( IsLower<MT1>::value )
4066  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4067  :( 0UL ) );
4068  const size_t iend( ( IsUpper<MT1>::value )
4069  ?( min( j+IT::size, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4070  :( M ) );
4071  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4072 
4073  IntrinsicType xmm1;
4074 
4075  for( size_t i=ibegin; i<iend; ++i ) {
4076  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
4077  }
4078 
4079  y.store( j, y.load(j) - xmm1*factor );
4080  }
4081 
4082  for( ; remainder && j<N; ++j )
4083  {
4084  const size_t ibegin( ( IsLower<MT1>::value )
4085  ?( IsStrictlyLower<MT1>::value ? j+1UL : j )
4086  :( 0UL ) );
4087  const size_t iend( ( IsUpper<MT1>::value )
4088  ?( min( j+1UL, M ) - ( IsStrictlyUpper<MT1>::value ? 1UL : 0UL ) )
4089  :( M ) );
4090  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4091 
4092  ElementType value = ElementType();
4093 
4094  for( size_t i=ibegin; i<iend; ++i ) {
4095  value += x[i] * A(i,j);
4096  }
4097 
4098  y[j] -= value * scalar;
4099  }
4100  }
4101  //**********************************************************************************************
4102 
4103  //**Default subtraction assignment to dense vectors (large matrices)****************************
4117  template< typename VT1 // Type of the left-hand side target vector
4118  , typename VT2 // Type of the left-hand side vector operand
4119  , typename MT1 // Type of the right-hand side matrix operand
4120  , typename ST2 > // Type of the scalar value
4121  static inline typename DisableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4122  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4123  {
4124  selectDefaultSubAssignKernel( y, x, A, scalar );
4125  }
4126  //**********************************************************************************************
4127 
4128  //**Vectorized default subtraction assignment to dense vectors (large matrices)*****************
4143  template< typename VT1 // Type of the left-hand side target vector
4144  , typename VT2 // Type of the left-hand side vector operand
4145  , typename MT1 // Type of the right-hand side matrix operand
4146  , typename ST2 > // Type of the scalar value
4147  static inline typename EnableIf< UseVectorizedDefaultKernel<VT1,VT2,MT1,ST2> >::Type
4148  selectLargeSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4149  {
4150  typedef IntrinsicTrait<ElementType> IT;
4151 
4152  const size_t M( A.rows() );
4153  const size_t N( A.columns() );
4154 
4155  const bool remainder( !IsPadded<VT1>::value || !IsPadded<MT1>::value );
4156 
4157  const size_t jblock( 32768UL / sizeof( ElementType ) );
4158  const size_t iblock( ( N < jblock )?( 8UL ):( 4UL ) );
4159 
4160  const IntrinsicType factor( set( scalar ) );
4161 
4162  BLAZE_INTERNAL_ASSERT( ( jblock % IT::size ) == 0UL, "Invalid block size detected" );
4163 
4164  for( size_t jj=0U; jj<N; jj+=jblock ) {
4165  for( size_t ii=0UL; ii<M; ii+=iblock )
4166  {
4167  const size_t iend( min( ii+iblock, M ) );
4168  const size_t jtmp( min( jj+jblock, N ) );
4169  const size_t jend( ( IsLower<MT1>::value )
4170  ?( min( jtmp, ( IsStrictlyLower<MT1>::value ? iend-1UL : iend ) ) )
4171  :( jtmp ) );
4172 
4173  const size_t jpos( remainder ? ( jend & size_t(-IT::size) ) : jend );
4174  BLAZE_INTERNAL_ASSERT( !remainder || ( jend - ( jend % IT::size ) ) == jpos, "Invalid end calculation" );
4175 
4176  size_t j( ( IsUpper<MT1>::value )
4177  ?( max( jj, ( IsStrictlyUpper<MT1>::value ? ii+1UL : ii ) & size_t(-IT::size) ) )
4178  :( jj ) );
4179 
4180  for( ; (j+IT::size*7UL) < jpos; j+=IT::size*8UL )
4181  {
4182  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4183 
4184  for( size_t i=ii; i<iend; ++i ) {
4185  const IntrinsicType x1( set( x[i] ) );
4186  xmm1 = xmm1 + x1 * A.load(i,j );
4187  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4188  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4189  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
4190  xmm5 = xmm5 + x1 * A.load(i,j+IT::size*4UL);
4191  xmm6 = xmm6 + x1 * A.load(i,j+IT::size*5UL);
4192  xmm7 = xmm7 + x1 * A.load(i,j+IT::size*6UL);
4193  xmm8 = xmm8 + x1 * A.load(i,j+IT::size*7UL);
4194  }
4195 
4196  y.store( j , y.load(j ) - xmm1*factor );
4197  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4198  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4199  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
4200  y.store( j+IT::size*4UL, y.load(j+IT::size*4UL) - xmm5*factor );
4201  y.store( j+IT::size*5UL, y.load(j+IT::size*5UL) - xmm6*factor );
4202  y.store( j+IT::size*6UL, y.load(j+IT::size*6UL) - xmm7*factor );
4203  y.store( j+IT::size*7UL, y.load(j+IT::size*7UL) - xmm8*factor );
4204  }
4205 
4206  for( ; (j+IT::size*3UL) < jpos; j+=IT::size*4UL )
4207  {
4208  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4209 
4210  for( size_t i=ii; i<iend; ++i ) {
4211  const IntrinsicType x1( set( x[i] ) );
4212  xmm1 = xmm1 + x1 * A.load(i,j );
4213  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4214  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4215  xmm4 = xmm4 + x1 * A.load(i,j+IT::size*3UL);
4216  }
4217 
4218  y.store( j , y.load(j ) - xmm1*factor );
4219  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4220  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4221  y.store( j+IT::size*3UL, y.load(j+IT::size*3UL) - xmm4*factor );
4222  }
4223 
4224  for( ; (j+IT::size*2UL) < jpos; j+=IT::size*3UL )
4225  {
4226  IntrinsicType xmm1, xmm2, xmm3;
4227 
4228  for( size_t i=ii; i<iend; ++i ) {
4229  const IntrinsicType x1( set( x[i] ) );
4230  xmm1 = xmm1 + x1 * A.load(i,j );
4231  xmm2 = xmm2 + x1 * A.load(i,j+IT::size );
4232  xmm3 = xmm3 + x1 * A.load(i,j+IT::size*2UL);
4233  }
4234 
4235  y.store( j , y.load(j ) - xmm1*factor );
4236  y.store( j+IT::size , y.load(j+IT::size ) - xmm2*factor );
4237  y.store( j+IT::size*2UL, y.load(j+IT::size*2UL) - xmm3*factor );
4238  }
4239 
4240  for( ; (j+IT::size) < jpos; j+=IT::size*2UL )
4241  {
4242  IntrinsicType xmm1, xmm2;
4243 
4244  for( size_t i=ii; i<iend; ++i ) {
4245  const IntrinsicType x1( set( x[i] ) );
4246  xmm1 = xmm1 + x1 * A.load(i,j );
4247  xmm2 = xmm2 + x1 * A.load(i,j+IT::size);
4248  }
4249 
4250  y.store( j , y.load(j ) - xmm1*factor );
4251  y.store( j+IT::size, y.load(j+IT::size) - xmm2*factor );
4252  }
4253 
4254  for( ; j<jpos; j+=IT::size )
4255  {
4256  IntrinsicType xmm1;
4257 
4258  for( size_t i=ii; i<iend; ++i ) {
4259  xmm1 = xmm1 + set( x[i] ) * A.load(i,j);
4260  }
4261 
4262  y.store( j, y.load(j) - xmm1*factor );
4263  }
4264 
4265  for( ; remainder && j<jend; ++j )
4266  {
4267  ElementType value = ElementType();
4268 
4269  for( size_t i=ii; i<iend; ++i ) {
4270  value += x[i] * A(i,j);
4271  }
4272 
4273  y[j] -= value * scalar;
4274  }
4275  }
4276  }
4277  }
4278  //**********************************************************************************************
4279 
4280  //**BLAS-based subtraction assignment to dense vectors (default)********************************
4294  template< typename VT1 // Type of the left-hand side target vector
4295  , typename VT2 // Type of the left-hand side vector operand
4296  , typename MT1 // Type of the right-hand side matrix operand
4297  , typename ST2 > // Type of the scalar value
4298  static inline typename DisableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
4299  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4300  {
4301  selectLargeSubAssignKernel( y, x, A, scalar );
4302  }
4303  //**********************************************************************************************
4304 
4305  //**BLAS-based subtraction assignment to dense vectors******************************************
4306 #if BLAZE_BLAS_MODE
4307 
4320  template< typename VT1 // Type of the left-hand side target vector
4321  , typename VT2 // Type of the left-hand side vector operand
4322  , typename MT1 // Type of the right-hand side matrix operand
4323  , typename ST2 > // Type of the scalar value
4324  static inline typename EnableIf< UseBlasKernel<VT1,VT2,MT1,ST2> >::Type
4325  selectBlasSubAssignKernel( VT1& y, const VT2& x, const MT1& A, ST2 scalar )
4326  {
4327  typedef typename VT1::ElementType ET;
4328 
4329  if( IsTriangular<MT1>::value ) {
4330  typename VT1::ResultType tmp( serial( scalar * x ) );
4331  trmv( tmp, A, ( IsLower<MT1>::value )?( CblasLower ):( CblasUpper ) );
4332  subAssign( y, tmp );
4333  }
4334  else {
4335  gemv( y, x, A, ET(-scalar), ET(1) );
4336  }
4337  }
4338 #endif
4339  //**********************************************************************************************
4340 
4341  //**Subtraction assignment to sparse vectors****************************************************
4342  // No special implementation for the subtraction assignment to sparse vectors.
4343  //**********************************************************************************************
4344 
4345  //**Multiplication assignment to dense vectors**************************************************
4357  template< typename VT1 > // Type of the target dense vector
4358  friend inline void multAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4359  {
4361 
4365 
4366  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4367 
4368  const ResultType tmp( serial( rhs ) );
4369  multAssign( ~lhs, tmp );
4370  }
4371  //**********************************************************************************************
4372 
4373  //**Multiplication assignment to sparse vectors*************************************************
4374  // No special implementation for the multiplication assignment to sparse vectors.
4375  //**********************************************************************************************
4376 
4377  //**SMP assignment to dense vectors*************************************************************
4391  template< typename VT1 > // Type of the target dense vector
4392  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4393  smpAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4394  {
4396 
4397  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4398 
4399  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4400  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4401 
4402  if( right.rows() == 0UL ) {
4403  reset( ~lhs );
4404  return;
4405  }
4406  else if( right.columns() == 0UL ) {
4407  return;
4408  }
4409 
4410  LT x( left ); // Evaluation of the left-hand side dense vector operand
4411  RT A( right ); // Evaluation of the right-hand side dense matrix operand
4412 
4413  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
4414  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
4415  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
4416  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
4417 
4418  smpAssign( ~lhs, x * A * rhs.scalar_ );
4419  }
4420  //**********************************************************************************************
4421 
4422  //**SMP assignment to sparse vectors************************************************************
4436  template< typename VT1 > // Type of the target sparse vector
4437  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4438  smpAssign( SparseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4439  {
4441 
4445 
4446  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4447 
4448  const ResultType tmp( rhs );
4449  smpAssign( ~lhs, tmp );
4450  }
4451  //**********************************************************************************************
4452 
4453  //**SMP addition assignment to dense vectors****************************************************
4467  template< typename VT1 > // Type of the target dense vector
4468  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4469  smpAddAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4470  {
4472 
4473  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4474 
4475  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4476  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4477 
4478  if( right.rows() == 0UL || right.columns() == 0UL ) {
4479  return;
4480  }
4481 
4482  LT x( left ); // Evaluation of the left-hand side dense vector operand
4483  RT A( right ); // Evaluation of the right-hand side dense matrix operand
4484 
4485  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
4486  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
4487  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
4488  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
4489 
4490  smpAddAssign( ~lhs, x * A * rhs.scalar_ );
4491  }
4492  //**********************************************************************************************
4493 
4494  //**SMP addition assignment to sparse vectors***************************************************
4495  // No special implementation for the SMP addition assignment to sparse vectors.
4496  //**********************************************************************************************
4497 
4498  //**SMP subtraction assignment to dense vectors*************************************************
4512  template< typename VT1 > // Type of the target dense vector
4513  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4514  smpSubAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4515  {
4517 
4518  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4519 
4520  typename VMM::LeftOperand left ( rhs.vector_.leftOperand() );
4521  typename VMM::RightOperand right( rhs.vector_.rightOperand() );
4522 
4523  if( right.rows() == 0UL || right.columns() == 0UL ) {
4524  return;
4525  }
4526 
4527  LT x( left ); // Evaluation of the left-hand side dense vector operand
4528  RT A( right ); // Evaluation of the right-hand side dense matrix operand
4529 
4530  BLAZE_INTERNAL_ASSERT( x.size() == left.size() , "Invalid vector size" );
4531  BLAZE_INTERNAL_ASSERT( A.rows() == right.rows() , "Invalid number of rows" );
4532  BLAZE_INTERNAL_ASSERT( A.columns() == right.columns(), "Invalid number of columns" );
4533  BLAZE_INTERNAL_ASSERT( A.columns() == (~lhs).size() , "Invalid vector size" );
4534 
4535  smpSubAssign( ~lhs, x * A * rhs.scalar_ );
4536  }
4537  //**********************************************************************************************
4538 
4539  //**SMP subtraction assignment to sparse vectors************************************************
4540  // No special implementation for the SMP subtraction assignment to sparse vectors.
4541  //**********************************************************************************************
4542 
4543  //**SMP multiplication assignment to dense vectors**********************************************
4558  template< typename VT1 > // Type of the target dense vector
4559  friend inline typename EnableIf< UseSMPAssign<VT1> >::Type
4560  smpMultAssign( DenseVector<VT1,true>& lhs, const DVecScalarMultExpr& rhs )
4561  {
4563 
4567 
4568  BLAZE_INTERNAL_ASSERT( (~lhs).size() == rhs.size(), "Invalid vector sizes" );
4569 
4570  const ResultType tmp( rhs );
4571  smpMultAssign( ~lhs, tmp );
4572  }
4573  //**********************************************************************************************
4574 
4575  //**SMP multiplication assignment to sparse vectors*********************************************
4576  // No special implementation for the SMP multiplication assignment to sparse vectors.
4577  //**********************************************************************************************
4578 
4579  //**Compile time checks*************************************************************************
4587  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
4588  //**********************************************************************************************
4589 };
4591 //*************************************************************************************************
4592 
4593 
4594 
4595 
4596 //=================================================================================================
4597 //
4598 // GLOBAL BINARY ARITHMETIC OPERATORS
4599 //
4600 //=================================================================================================
4601 
4602 //*************************************************************************************************
4633 template< typename T1 // Type of the left-hand side dense vector
4634  , typename T2 > // Type of the right-hand side dense matrix
4635 inline const typename DisableIf< IsMatMatMultExpr<T2>, TDVecDMatMultExpr<T1,T2> >::Type
4637 {
4639 
4640  if( (~vec).size() != (~mat).rows() ) {
4641  BLAZE_THROW_INVALID_ARGUMENT( "Vector and matrix sizes do not match" );
4642  }
4643 
4644  return TDVecDMatMultExpr<T1,T2>( ~vec, ~mat );
4645 }
4646 //*************************************************************************************************
4647 
4648 
4649 
4650 
4651 //=================================================================================================
4652 //
4653 // GLOBAL RESTRUCTURING BINARY ARITHMETIC OPERATORS
4654 //
4655 //=================================================================================================
4656 
4657 //*************************************************************************************************
4670 template< typename T1 // Type of the left-hand side dense vector
4671  , typename T2 // Type of the right-hand side dense matrix
4672  , bool SO > // Storage order of the right-hand side dense matrix
4673 inline const typename EnableIf< IsMatMatMultExpr<T2>, typename MultExprTrait<T1,T2>::Type >::Type
4675 {
4677 
4679 
4680  return ( vec * (~mat).leftOperand() ) * (~mat).rightOperand();
4681 }
4682 //*************************************************************************************************
4683 
4684 
4685 
4686 
4687 //=================================================================================================
4688 //
4689 // SIZE SPECIALIZATIONS
4690 //
4691 //=================================================================================================
4692 
4693 //*************************************************************************************************
4695 template< typename VT, typename MT >
4696 struct Size< TDVecDMatMultExpr<VT,MT> > : public Columns<MT>
4697 {};
4699 //*************************************************************************************************
4700 
4701 
4702 
4703 
4704 //=================================================================================================
4705 //
4706 // ISALIGNED SPECIALIZATIONS
4707 //
4708 //=================================================================================================
4709 
4710 //*************************************************************************************************
4712 template< typename VT, typename MT >
4713 struct IsAligned< TDVecDMatMultExpr<VT,MT> >
4714  : public IsTrue< And< IsAligned<VT>, IsAligned<MT> >::value >
4715 {};
4717 //*************************************************************************************************
4718 
4719 
4720 
4721 
4722 //=================================================================================================
4723 //
4724 // EXPRESSION TRAIT SPECIALIZATIONS
4725 //
4726 //=================================================================================================
4727 
4728 //*************************************************************************************************
4730 template< typename VT, typename MT, bool AF >
4731 struct SubvectorExprTrait< TDVecDMatMultExpr<VT,MT>, AF >
4732 {
4733  public:
4734  //**********************************************************************************************
4735  typedef typename MultExprTrait< typename SubvectorExprTrait<const VT,AF>::Type
4736  , typename SubmatrixExprTrait<const MT,AF>::Type >::Type Type;
4737  //**********************************************************************************************
4738 };
4740 //*************************************************************************************************
4741 
4742 } // namespace blaze
4743 
4744 #endif
VT::ResultType VRT
Result type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:126
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exceptionThis macro encapsulates the default way of...
Definition: Exception.h:187
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1729
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, simd_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:206
Header file for mathematical functions.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7820
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:252
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:207
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the IsSame and IsStrictlySame type traits.
LeftOperand leftOperand() const
Returns the left-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:320
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:507
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2588
MRT::ElementType MET
Element type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:129
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:259
BLAZE_ALWAYS_INLINE size_t rows(const Matrix< MT, SO > &matrix)
Returns the current number of rows of the matrix.
Definition: Matrix.h:308
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
LeftOperand vec_
Left-hand side dense vector of the multiplication expression.
Definition: TDVecDMatMultExpr.h:384
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
Header file for the Computation base class.
SelectType< evaluateMatrix, const MRT, MCT >::Type RT
Type for the assignment of the right-hand side dense vector operand.
Definition: TDVecDMatMultExpr.h:219
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Header file for the VecScalarMultExpr base class.
Header file for the IsFloat type trait.
size_t size() const
Returns the current size/dimension of the vector.
Definition: TDVecDMatMultExpr.h:310
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDVecDMatMultExpr.h:207
ResultType::ElementType ElementType
Resulting element type.
Definition: TDVecDMatMultExpr.h:204
Header file for the IsComplexDouble type trait.
MultTrait< VRT, MRT >::Type ResultType
Result type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:202
Constraint on the data type.
Header file for the MultExprTrait class template.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDVecDMatMultExpr.h:374
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:330
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
MT::CompositeType MCT
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:131
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:261
ReturnType operator[](size_t index) const
Subscript operator for the direct access to the vector elements.
Definition: TDVecDMatMultExpr.h:255
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
MT::ResultType MRT
Result type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:127
TDVecDMatMultExpr(const VT &vec, const MT &mat)
Constructor for the TDVecDMatMultExpr class.
Definition: TDVecDMatMultExpr.h:241
SelectType< evaluateVector, const VRT, VCT >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDVecDMatMultExpr.h:216
Header file for the IsMatMatMultExpr type trait class.
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exceptionThis macro encapsulates the default way of Bla...
Definition: Exception.h:331
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1682
Header file for the Columns type trait.
SelectType< IsExpression< MT >::value, const MT, const MT & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDVecDMatMultExpr.h:213
Header file for the IsBlasCompatible type trait.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDVecDMatMultExpr.h:203
Base class for N-dimensional dense vectors.The DenseVector class is a base class for all arbitrarily ...
Definition: DenseVector.h:70
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDVecDMatMultExpr.h:354
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDVecDMatMultExpr.h:205
Constraint on the data type.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2586
RightOperand mat_
Right-hand side dense matrix of the multiplication expression.
Definition: TDVecDMatMultExpr.h:385
Header file for the SelectType class template.
Header file for all forward declarations for expression class templates.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDVecDMatMultExpr.h:364
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
TDVecDMatMultExpr< VT, MT > This
Type of this TDVecDMatMultExpr instance.
Definition: TDVecDMatMultExpr.h:201
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
VT::CompositeType VCT
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:130
System settings for the BLAS mode.
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:79
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:1232
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Base template for the MultTrait class.
Definition: MultTrait.h:138
Header file for BLAS triangular matrix/vector multiplication functions (trmv)
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
const bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
Header file for the reset shim.
Constraint on the data type.
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBlasCompatible.h:99
Header file for the TVecMatMultExpr base class.
Constraint on the data type.
Expression object for transpose dense vector-dense matrix multiplications.The TDVecDMatMultExpr class...
Definition: Forward.h:146
Header file for the HasMutableDataAccess type trait.
Header file for all intrinsic functionality.
Header file for BLAS general matrix/vector multiplication functions (gemv)
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
ReturnType at(size_t index) const
Checked access to the vector elements.
Definition: TDVecDMatMultExpr.h:297
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional vector type...
Definition: DenseVector.h:79
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_TVECMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid vector/matrix ...
Definition: TVecMatMultExpr.h:166
VRT::ElementType VET
Element type of the left-hand side dense vector epxression.
Definition: TDVecDMatMultExpr.h:128
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:258
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDVecDMatMultExpr.h:342
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the IsComplexFloat type trait.
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2583
#define BLAZE_CONSTRAINT_MUST_BE_ROW_VECTOR_TYPE(T)
Constraint on the data type.In case the given data type T is not a row dense or sparse vector type (i...
Definition: RowVector.h:79
Header file for the IsComplex type trait.
Header file for the SubvectorExprTrait class template.
Header file for the complex data type.
Header file for the IsUpper type trait.
Header file for exception macros.
EnableIf< IsDenseVector< VT1 > >::Type smpMultAssign(Vector< VT1, TF1 > &lhs, const Vector< VT2, TF2 > &rhs)
Default implementation of the SMP multiplication assignment of a vector to a dense vector...
Definition: DenseVector.h:189
Header file for the Size type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
SelectType< IsExpression< VT >::value, const VT, const VT & >::Type LeftOperand
Composite type of the left-hand side dense vector expression.
Definition: TDVecDMatMultExpr.h:210
Constraint on the transpose flag of vector types.
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.