All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
53 #include <blaze/math/Intrinsics.h>
54 #include <blaze/math/shims/Reset.h>
82 #include <blaze/system/BLAS.h>
84 #include <blaze/util/Assert.h>
85 #include <blaze/util/Complex.h>
91 #include <blaze/util/DisableIf.h>
92 #include <blaze/util/EnableIf.h>
93 #include <blaze/util/InvalidType.h>
95 #include <blaze/util/SelectType.h>
96 #include <blaze/util/Types.h>
103 
104 
105 namespace blaze {
106 
107 //=================================================================================================
108 //
109 // CLASS TDMATDMATMULTEXPR
110 //
111 //=================================================================================================
112 
113 //*************************************************************************************************
120 template< typename MT1 // Type of the left-hand side dense matrix
121  , typename MT2 > // Type of the right-hand side dense matrix
122 class TDMatDMatMultExpr : public DenseMatrix< TDMatDMatMultExpr<MT1,MT2>, true >
123  , private MatMatMultExpr
124  , private Computation
125 {
126  private:
127  //**Type definitions****************************************************************************
128  typedef typename MT1::ResultType RT1;
129  typedef typename MT2::ResultType RT2;
130  typedef typename RT1::ElementType ET1;
131  typedef typename RT2::ElementType ET2;
132  typedef typename MT1::CompositeType CT1;
133  typedef typename MT2::CompositeType CT2;
134  //**********************************************************************************************
135 
136  //**********************************************************************************************
139  //**********************************************************************************************
140 
141  //**********************************************************************************************
143  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
144  //**********************************************************************************************
145 
146  //**********************************************************************************************
148 
152  template< typename T1, typename T2, typename T3 >
153  struct IsEvaluationRequired {
154  enum { value = ( evaluateLeft || evaluateRight ) };
155  };
157  //**********************************************************************************************
158 
159  //**********************************************************************************************
161 
164  template< typename T1, typename T2, typename T3 >
165  struct UseSinglePrecisionKernel {
166  enum { value = BLAZE_BLAS_MODE &&
167  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
168  IsFloat<typename T1::ElementType>::value &&
169  IsFloat<typename T2::ElementType>::value &&
170  IsFloat<typename T3::ElementType>::value };
171  };
173  //**********************************************************************************************
174 
175  //**********************************************************************************************
177 
180  template< typename T1, typename T2, typename T3 >
181  struct UseDoublePrecisionKernel {
182  enum { value = BLAZE_BLAS_MODE &&
183  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
184  IsDouble<typename T1::ElementType>::value &&
185  IsDouble<typename T2::ElementType>::value &&
186  IsDouble<typename T3::ElementType>::value };
187  };
189  //**********************************************************************************************
190 
191  //**********************************************************************************************
193 
197  template< typename T1, typename T2, typename T3 >
198  struct UseSinglePrecisionComplexKernel {
199  typedef complex<float> Type;
200  enum { value = BLAZE_BLAS_MODE &&
201  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
202  IsSame<typename T1::ElementType,Type>::value &&
203  IsSame<typename T2::ElementType,Type>::value &&
204  IsSame<typename T3::ElementType,Type>::value };
205  };
207  //**********************************************************************************************
208 
209  //**********************************************************************************************
211 
215  template< typename T1, typename T2, typename T3 >
216  struct UseDoublePrecisionComplexKernel {
217  typedef complex<double> Type;
218  enum { value = BLAZE_BLAS_MODE &&
219  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
220  IsSame<typename T1::ElementType,Type>::value &&
221  IsSame<typename T2::ElementType,Type>::value &&
222  IsSame<typename T3::ElementType,Type>::value };
223  };
225  //**********************************************************************************************
226 
227  //**********************************************************************************************
229 
232  template< typename T1, typename T2, typename T3 >
233  struct UseDefaultKernel {
234  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
235  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
236  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
237  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
238  };
240  //**********************************************************************************************
241 
242  //**********************************************************************************************
244 
247  template< typename T1, typename T2, typename T3 >
248  struct UseVectorizedDefaultKernel {
249  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
250  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
251  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
252  IntrinsicTrait<typename T1::ElementType>::addition &&
253  IntrinsicTrait<typename T1::ElementType>::subtraction &&
254  IntrinsicTrait<typename T1::ElementType>::multiplication };
255  };
257  //**********************************************************************************************
258 
259  public:
260  //**Type definitions****************************************************************************
267  typedef const ElementType ReturnType;
268  typedef const ResultType CompositeType;
269 
271  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
272 
274  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
275 
278 
281  //**********************************************************************************************
282 
283  //**Compilation flags***************************************************************************
285  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
289 
291  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
292  !evaluateRight && MT2::smpAssignable };
293  //**********************************************************************************************
294 
295  //**Constructor*********************************************************************************
301  explicit inline TDMatDMatMultExpr( const MT1& lhs, const MT2& rhs )
302  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
303  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
304  {
305  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
306  }
307  //**********************************************************************************************
308 
309  //**Access operator*****************************************************************************
316  inline ReturnType operator()( size_t i, size_t j ) const {
317  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
318  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
319 
320  ElementType tmp;
321 
322  if( lhs_.columns() != 0UL ) {
323  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
324  tmp = lhs_(i,0UL) * rhs_(0UL,j);
325  for( size_t k=1UL; k<end; k+=2UL ) {
326  tmp += lhs_(i,k ) * rhs_(k ,j);
327  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
328  }
329  if( end < lhs_.columns() ) {
330  tmp += lhs_(i,end) * rhs_(end,j);
331  }
332  }
333  else {
334  reset( tmp );
335  }
336 
337  return tmp;
338  }
339  //**********************************************************************************************
340 
341  //**Rows function*******************************************************************************
346  inline size_t rows() const {
347  return lhs_.rows();
348  }
349  //**********************************************************************************************
350 
351  //**Columns function****************************************************************************
356  inline size_t columns() const {
357  return rhs_.columns();
358  }
359  //**********************************************************************************************
360 
361  //**Left operand access*************************************************************************
366  inline LeftOperand leftOperand() const {
367  return lhs_;
368  }
369  //**********************************************************************************************
370 
371  //**Right operand access************************************************************************
376  inline RightOperand rightOperand() const {
377  return rhs_;
378  }
379  //**********************************************************************************************
380 
381  //**********************************************************************************************
387  template< typename T >
388  inline bool canAlias( const T* alias ) const {
389  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
390  }
391  //**********************************************************************************************
392 
393  //**********************************************************************************************
399  template< typename T >
400  inline bool isAliased( const T* alias ) const {
401  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
402  }
403  //**********************************************************************************************
404 
405  //**********************************************************************************************
410  inline bool isAligned() const {
411  return lhs_.isAligned() && rhs_.isAligned();
412  }
413  //**********************************************************************************************
414 
415  //**********************************************************************************************
420  inline bool canSMPAssign() const {
421  return ( !BLAZE_BLAS_IS_PARALLEL ||
422  ( rows() * columns() < TDMATDMATMULT_THRESHOLD ) ) &&
424  }
425  //**********************************************************************************************
426 
427  private:
428  //**Member variables****************************************************************************
431  //**********************************************************************************************
432 
433  //**BLAS kernel (single precision)**************************************************************
434 #if BLAZE_BLAS_MODE
435 
450  template< typename MT3 // Type of the left-hand side target matrix
451  , typename MT4 // Type of the left-hand side matrix operand
452  , typename MT5 > // Type of the right-hand side matrix operand
453  static inline void sgemm( MT3& C, const MT4& A, const MT5& B, float alpha, float beta )
454  {
455  using boost::numeric_cast;
456 
460 
461  const int M ( numeric_cast<int>( A.rows() ) );
462  const int N ( numeric_cast<int>( B.columns() ) );
463  const int K ( numeric_cast<int>( A.columns() ) );
464  const int lda( numeric_cast<int>( A.spacing() ) );
465  const int ldb( numeric_cast<int>( B.spacing() ) );
466  const int ldc( numeric_cast<int>( C.spacing() ) );
467 
469  cblas_ssymm( CblasRowMajor, CblasLeft, CblasUpper,
470  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
471  }
473  cblas_ssymm( CblasColMajor, CblasRight, CblasLower,
474  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
475  }
476  else {
477  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
478  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
479  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
480  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
481  }
482  }
484 #endif
485  //**********************************************************************************************
486 
487  //**BLAS kernel (double precision)**************************************************************
488 #if BLAZE_BLAS_MODE
489 
504  template< typename MT3 // Type of the left-hand side target matrix
505  , typename MT4 // Type of the left-hand side matrix operand
506  , typename MT5 > // Type of the right-hand side matrix operand
507  static inline void dgemm( MT3& C, const MT4& A, const MT5& B, double alpha, double beta )
508  {
509  using boost::numeric_cast;
510 
514 
515  const int M ( numeric_cast<int>( A.rows() ) );
516  const int N ( numeric_cast<int>( B.columns() ) );
517  const int K ( numeric_cast<int>( A.columns() ) );
518  const int lda( numeric_cast<int>( A.spacing() ) );
519  const int ldb( numeric_cast<int>( B.spacing() ) );
520  const int ldc( numeric_cast<int>( C.spacing() ) );
521 
523  cblas_dsymm( CblasRowMajor, CblasLeft, CblasUpper,
524  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
525  }
526  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
527  cblas_dsymm( CblasColMajor, CblasRight, CblasLower,
528  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
529  }
530  else {
531  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
532  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
533  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
534  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
535  }
536  }
538 #endif
539  //**********************************************************************************************
540 
541  //**BLAS kernel (single precision complex)******************************************************
542 #if BLAZE_BLAS_MODE
543 
558  template< typename MT3 // Type of the left-hand side target matrix
559  , typename MT4 // Type of the left-hand side matrix operand
560  , typename MT5 > // Type of the right-hand side matrix operand
561  static inline void cgemm( MT3& C, const MT4& A, const MT5& B,
562  complex<float> alpha, complex<float> beta )
563  {
564  using boost::numeric_cast;
565 
569  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
570  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
571  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
572 
573  const int M ( numeric_cast<int>( A.rows() ) );
574  const int N ( numeric_cast<int>( B.columns() ) );
575  const int K ( numeric_cast<int>( A.columns() ) );
576  const int lda( numeric_cast<int>( A.spacing() ) );
577  const int ldb( numeric_cast<int>( B.spacing() ) );
578  const int ldc( numeric_cast<int>( C.spacing() ) );
579 
580  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
581  cblas_csymm( CblasRowMajor, CblasLeft, CblasUpper,
582  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
583  }
584  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
585  cblas_csymm( CblasColMajor, CblasRight, CblasLower,
586  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
587  }
588  else {
589  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
590  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
591  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
592  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
593  }
594  }
596 #endif
597  //**********************************************************************************************
598 
599  //**BLAS kernel (double precision complex)******************************************************
600 #if BLAZE_BLAS_MODE
601 
616  template< typename MT3 // Type of the left-hand side target matrix
617  , typename MT4 // Type of the left-hand side matrix operand
618  , typename MT5 > // Type of the right-hand side matrix operand
619  static inline void zgemm( MT3& C, const MT4& A, const MT5& B,
620  complex<double> alpha, complex<double> beta )
621  {
622  using boost::numeric_cast;
623 
627  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
628  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
629  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
630 
631  const int M ( numeric_cast<int>( A.rows() ) );
632  const int N ( numeric_cast<int>( B.columns() ) );
633  const int K ( numeric_cast<int>( A.columns() ) );
634  const int lda( numeric_cast<int>( A.spacing() ) );
635  const int ldb( numeric_cast<int>( B.spacing() ) );
636  const int ldc( numeric_cast<int>( C.spacing() ) );
637 
638  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
639  cblas_zsymm( CblasRowMajor, CblasLeft, CblasUpper,
640  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
641  }
642  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
643  cblas_zsymm( CblasColMajor, CblasRight, CblasLower,
644  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
645  }
646  else {
647  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
648  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
649  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
650  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
651  }
652  }
654 #endif
655  //**********************************************************************************************
656 
657  //**Assignment to dense matrices****************************************************************
670  template< typename MT // Type of the target dense matrix
671  , bool SO > // Storage order of the target dense matrix
672  friend inline void assign( DenseMatrix<MT,SO>& lhs, const TDMatDMatMultExpr& rhs )
673  {
675 
676  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
677  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
678 
679  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
680  return;
681  }
682  else if( rhs.lhs_.columns() == 0UL ) {
683  reset( ~lhs );
684  return;
685  }
686 
687  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
688  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
689 
690  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
691  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
692  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
693  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
694  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
695  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
696 
697  TDMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
698  }
700  //**********************************************************************************************
701 
702  //**Assignment to dense matrices (kernel selection)*********************************************
713  template< typename MT3 // Type of the left-hand side target matrix
714  , typename MT4 // Type of the left-hand side matrix operand
715  , typename MT5 > // Type of the right-hand side matrix operand
716  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
717  {
718  if( C.rows() * C.columns() < TDMATDMATMULT_THRESHOLD )
719  TDMatDMatMultExpr::selectDefaultAssignKernel( C, A, B );
720  else
721  TDMatDMatMultExpr::selectBlasAssignKernel( C, A, B );
722  }
724  //**********************************************************************************************
725 
726  //**Default assignment to dense matrices********************************************************
740  template< typename MT3 // Type of the left-hand side target matrix
741  , typename MT4 // Type of the left-hand side matrix operand
742  , typename MT5 > // Type of the right-hand side matrix operand
743  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
744  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
745  {
746  const size_t M( A.rows() );
747  const size_t N( B.columns() );
748  const size_t K( A.columns() );
749 
750  for( size_t i=0UL; i<M; ++i ) {
751  for( size_t j=0UL; j<N; ++j ) {
752  C(i,j) = A(i,0UL) * B(0UL,j);
753  }
754  for( size_t k=1UL; k<K; ++k ) {
755  for( size_t j=0UL; j<N; ++j ) {
756  C(i,j) += A(i,k) * B(k,j);
757  }
758  }
759  }
760  }
762  //**********************************************************************************************
763 
764  //**Vectorized default assignment to row-major dense matrices***********************************
778  template< typename MT3 // Type of the left-hand side target matrix
779  , typename MT4 // Type of the left-hand side matrix operand
780  , typename MT5 > // Type of the right-hand side matrix operand
781  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
782  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
783  {
784  typedef IntrinsicTrait<ElementType> IT;
785 
786  const size_t M( A.rows() );
787  const size_t N( B.columns() );
788  const size_t K( A.columns() );
789 
790  size_t j( 0UL );
791 
792  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
793  for( size_t i=0UL; i<M; ++i ) {
794  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
795  for( size_t k=0UL; k<K; ++k ) {
796  const IntrinsicType a1( set( A(i,k) ) );
797  xmm1 = xmm1 + a1 * B.load(k,j );
798  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
799  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
800  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
801  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
802  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
803  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
804  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
805  }
806  (~C).store( i, j , xmm1 );
807  (~C).store( i, j+IT::size , xmm2 );
808  (~C).store( i, j+IT::size*2UL, xmm3 );
809  (~C).store( i, j+IT::size*3UL, xmm4 );
810  (~C).store( i, j+IT::size*4UL, xmm5 );
811  (~C).store( i, j+IT::size*5UL, xmm6 );
812  (~C).store( i, j+IT::size*6UL, xmm7 );
813  (~C).store( i, j+IT::size*7UL, xmm8 );
814  }
815  }
816  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
817  size_t i( 0UL );
818  for( ; (i+2UL) <= M; i+=2UL ) {
819  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
820  for( size_t k=0UL; k<K; ++k ) {
821  const IntrinsicType a1( set( A(i ,k) ) );
822  const IntrinsicType a2( set( A(i+1UL,k) ) );
823  const IntrinsicType b1( B.load(k,j ) );
824  const IntrinsicType b2( B.load(k,j+IT::size ) );
825  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
826  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
827  xmm1 = xmm1 + a1 * b1;
828  xmm2 = xmm2 + a1 * b2;
829  xmm3 = xmm3 + a1 * b3;
830  xmm4 = xmm4 + a1 * b4;
831  xmm5 = xmm5 + a2 * b1;
832  xmm6 = xmm6 + a2 * b2;
833  xmm7 = xmm7 + a2 * b3;
834  xmm8 = xmm8 + a2 * b4;
835  }
836  (~C).store( i , j , xmm1 );
837  (~C).store( i , j+IT::size , xmm2 );
838  (~C).store( i , j+IT::size*2UL, xmm3 );
839  (~C).store( i , j+IT::size*3UL, xmm4 );
840  (~C).store( i+1UL, j , xmm5 );
841  (~C).store( i+1UL, j+IT::size , xmm6 );
842  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
843  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
844  }
845  if( i < M ) {
846  IntrinsicType xmm1, xmm2, xmm3, xmm4;
847  for( size_t k=0UL; k<K; ++k ) {
848  const IntrinsicType a1( set( A(i,k) ) );
849  xmm1 = xmm1 + a1 * B.load(k,j );
850  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
851  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
852  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
853  }
854  (~C).store( i, j , xmm1 );
855  (~C).store( i, j+IT::size , xmm2 );
856  (~C).store( i, j+IT::size*2UL, xmm3 );
857  (~C).store( i, j+IT::size*3UL, xmm4 );
858  }
859  }
860  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
861  size_t i( 0UL );
862  for( ; (i+2UL) <= M; i+=2UL ) {
863  IntrinsicType xmm1, xmm2, xmm3, xmm4;
864  for( size_t k=0UL; k<K; ++k ) {
865  const IntrinsicType a1( set( A(i ,k) ) );
866  const IntrinsicType a2( set( A(i+1UL,k) ) );
867  const IntrinsicType b1( B.load(k,j ) );
868  const IntrinsicType b2( B.load(k,j+IT::size) );
869  xmm1 = xmm1 + a1 * b1;
870  xmm2 = xmm2 + a1 * b2;
871  xmm3 = xmm3 + a2 * b1;
872  xmm4 = xmm4 + a2 * b2;
873  }
874  (~C).store( i , j , xmm1 );
875  (~C).store( i , j+IT::size, xmm2 );
876  (~C).store( i+1UL, j , xmm3 );
877  (~C).store( i+1UL, j+IT::size, xmm4 );
878  }
879  if( i < M ) {
880  IntrinsicType xmm1, xmm2;
881  for( size_t k=0UL; k<K; ++k ) {
882  const IntrinsicType a1( set( A(i,k) ) );
883  xmm1 = xmm1 + a1 * B.load(k,j );
884  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
885  }
886  (~C).store( i, j , xmm1 );
887  (~C).store( i, j+IT::size, xmm2 );
888  }
889  }
890  if( j < N ) {
891  size_t i( 0UL );
892  for( ; (i+2UL) <= M; i+=2UL ) {
893  IntrinsicType xmm1, xmm2;
894  for( size_t k=0UL; k<K; ++k ) {
895  const IntrinsicType b1( B.load(k,j) );
896  xmm1 = xmm1 + set( A(i ,k) ) * b1;
897  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
898  }
899  (~C).store( i , j, xmm1 );
900  (~C).store( i+1UL, j, xmm2 );
901  }
902  if( i < M ) {
903  IntrinsicType xmm1;
904  for( size_t k=0UL; k<K; ++k ) {
905  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
906  }
907  (~C).store( i, j, xmm1 );
908  }
909  }
910  }
912  //**********************************************************************************************
913 
914  //**Vectorized default assignment to column-major dense matrices********************************
928  template< typename MT3 // Type of the left-hand side target matrix
929  , typename MT4 // Type of the left-hand side matrix operand
930  , typename MT5 > // Type of the right-hand side matrix operand
931  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
932  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
933  {
934  typedef IntrinsicTrait<ElementType> IT;
935 
936  const size_t M( A.rows() );
937  const size_t N( B.columns() );
938  const size_t K( A.columns() );
939 
940  size_t i( 0UL );
941 
942  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
943  for( size_t j=0UL; j<N; ++j ) {
944  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
945  for( size_t k=0UL; k<K; ++k ) {
946  const IntrinsicType b1( set( B(k,j) ) );
947  xmm1 = xmm1 + A.load(i ,k) * b1;
948  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
949  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
950  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
951  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
952  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
953  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
954  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
955  }
956  (~C).store( i , j, xmm1 );
957  (~C).store( i+IT::size , j, xmm2 );
958  (~C).store( i+IT::size*2UL, j, xmm3 );
959  (~C).store( i+IT::size*3UL, j, xmm4 );
960  (~C).store( i+IT::size*4UL, j, xmm5 );
961  (~C).store( i+IT::size*5UL, j, xmm6 );
962  (~C).store( i+IT::size*6UL, j, xmm7 );
963  (~C).store( i+IT::size*7UL, j, xmm8 );
964  }
965  }
966  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
967  size_t j( 0UL );
968  for( ; (j+2UL) <= N; j+=2UL ) {
969  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
970  for( size_t k=0UL; k<K; ++k ) {
971  const IntrinsicType a1( A.load(i ,k) );
972  const IntrinsicType a2( A.load(i+IT::size ,k) );
973  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
974  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
975  const IntrinsicType b1( set( B(k,j ) ) );
976  const IntrinsicType b2( set( B(k,j+1UL) ) );
977  xmm1 = xmm1 + a1 * b1;
978  xmm2 = xmm2 + a2 * b1;
979  xmm3 = xmm3 + a3 * b1;
980  xmm4 = xmm4 + a4 * b1;
981  xmm5 = xmm5 + a1 * b2;
982  xmm6 = xmm6 + a2 * b2;
983  xmm7 = xmm7 + a3 * b2;
984  xmm8 = xmm8 + a4 * b2;
985  }
986  (~C).store( i , j , xmm1 );
987  (~C).store( i+IT::size , j , xmm2 );
988  (~C).store( i+IT::size*2UL, j , xmm3 );
989  (~C).store( i+IT::size*3UL, j , xmm4 );
990  (~C).store( i , j+1UL, xmm5 );
991  (~C).store( i+IT::size , j+1UL, xmm6 );
992  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
993  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
994  }
995  if( j < N ) {
996  IntrinsicType xmm1, xmm2, xmm3, xmm4;
997  for( size_t k=0UL; k<K; ++k ) {
998  const IntrinsicType b1( set( B(k,j) ) );
999  xmm1 = xmm1 + A.load(i ,k) * b1;
1000  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1001  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1002  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1003  }
1004  (~C).store( i , j, xmm1 );
1005  (~C).store( i+IT::size , j, xmm2 );
1006  (~C).store( i+IT::size*2UL, j, xmm3 );
1007  (~C).store( i+IT::size*3UL, j, xmm4 );
1008  }
1009  }
1010  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1011  size_t j( 0UL );
1012  for( ; (j+2UL) <= N; j+=2UL ) {
1013  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1014  for( size_t k=0UL; k<K; ++k ) {
1015  const IntrinsicType a1( A.load(i ,k) );
1016  const IntrinsicType a2( A.load(i+IT::size,k) );
1017  const IntrinsicType b1( set( B(k,j ) ) );
1018  const IntrinsicType b2( set( B(k,j+1UL) ) );
1019  xmm1 = xmm1 + a1 * b1;
1020  xmm2 = xmm2 + a2 * b1;
1021  xmm3 = xmm3 + a1 * b2;
1022  xmm4 = xmm4 + a2 * b2;
1023  }
1024  (~C).store( i , j , xmm1 );
1025  (~C).store( i+IT::size, j , xmm2 );
1026  (~C).store( i , j+1UL, xmm3 );
1027  (~C).store( i+IT::size, j+1UL, xmm4 );
1028  }
1029  if( j < N ) {
1030  IntrinsicType xmm1, xmm2;
1031  for( size_t k=0UL; k<K; ++k ) {
1032  const IntrinsicType b1( set( B(k,j) ) );
1033  xmm1 = xmm1 + A.load(i ,k) * b1;
1034  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1035  }
1036  (~C).store( i , j, xmm1 );
1037  (~C).store( i+IT::size, j, xmm2 );
1038  }
1039  }
1040  if( i < M ) {
1041  size_t j( 0UL );
1042  for( ; (j+2UL) <= N; j+=2UL ) {
1043  IntrinsicType xmm1, xmm2;
1044  for( size_t k=0UL; k<K; ++k ) {
1045  const IntrinsicType a1( A.load(i,k) );
1046  xmm1 = xmm1 + a1 * set( B(k,j ) );
1047  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1048  }
1049  (~C).store( i, j , xmm1 );
1050  (~C).store( i, j+1UL, xmm2 );
1051  }
1052  if( j < N ) {
1053  IntrinsicType xmm1;
1054  for( size_t k=0UL; k<K; ++k ) {
1055  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
1056  }
1057  (~C).store( i, j, xmm1 );
1058  }
1059  }
1060  }
1062  //**********************************************************************************************
1063 
1064  //**BLAS-based assignment to dense matrices (default)*******************************************
1078  template< typename MT3 // Type of the left-hand side target matrix
1079  , typename MT4 // Type of the left-hand side matrix operand
1080  , typename MT5 > // Type of the right-hand side matrix operand
1081  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1082  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1083  {
1084  selectDefaultAssignKernel( C, A, B );
1085  }
1087  //**********************************************************************************************
1088 
1089  //**BLAS-based assignment to dense matrices (single precision)**********************************
1090 #if BLAZE_BLAS_MODE
1091 
1104  template< typename MT3 // Type of the left-hand side target matrix
1105  , typename MT4 // Type of the left-hand side matrix operand
1106  , typename MT5 > // Type of the right-hand side matrix operand
1107  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1108  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1109  {
1110  sgemm( C, A, B, 1.0F, 0.0F );
1111  }
1113 #endif
1114  //**********************************************************************************************
1115 
1116  //**BLAS-based assignment to dense matrices (double precision)**********************************
1117 #if BLAZE_BLAS_MODE
1118 
1131  template< typename MT3 // Type of the left-hand side target matrix
1132  , typename MT4 // Type of the left-hand side matrix operand
1133  , typename MT5 > // Type of the right-hand side matrix operand
1134  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1135  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1136  {
1137  dgemm( C, A, B, 1.0, 0.0 );
1138  }
1140 #endif
1141  //**********************************************************************************************
1142 
1143  //**BLAS-based assignment to dense matrices (single precision complex)**************************
1144 #if BLAZE_BLAS_MODE
1145 
1158  template< typename MT3 // Type of the left-hand side target matrix
1159  , typename MT4 // Type of the left-hand side matrix operand
1160  , typename MT5 > // Type of the right-hand side matrix operand
1161  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1162  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1163  {
1164  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1165  }
1167 #endif
1168  //**********************************************************************************************
1169 
1170  //**BLAS-based assignment to dense matrices (double precision complex)**************************
1171 #if BLAZE_BLAS_MODE
1172 
1185  template< typename MT3 // Type of the left-hand side target matrix
1186  , typename MT4 // Type of the left-hand side matrix operand
1187  , typename MT5 > // Type of the right-hand side matrix operand
1188  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1189  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1190  {
1191  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1192  }
1194 #endif
1195  //**********************************************************************************************
1196 
1197  //**Assignment to sparse matrices***************************************************************
1210  template< typename MT // Type of the target sparse matrix
1211  , bool SO > // Storage order of the target sparse matrix
1212  friend inline void assign( SparseMatrix<MT,SO>& lhs, const TDMatDMatMultExpr& rhs )
1213  {
1215 
1216  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
1217 
1224 
1225  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1226  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1227 
1228  const TmpType tmp( serial( rhs ) );
1229  assign( ~lhs, tmp );
1230  }
1232  //**********************************************************************************************
1233 
1234  //**Addition assignment to dense matrices*******************************************************
1247  template< typename MT // Type of the target dense matrix
1248  , bool SO > // Storage order of the target dense matrix
1249  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const TDMatDMatMultExpr& rhs )
1250  {
1252 
1253  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1254  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1255 
1256  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1257  return;
1258  }
1259 
1260  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1261  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1262 
1263  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1264  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1265  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1266  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1267  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1268  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1269 
1270  TDMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1271  }
1273  //**********************************************************************************************
1274 
1275  //**Addition assignment to dense matrices (kernel selection)************************************
1286  template< typename MT3 // Type of the left-hand side target matrix
1287  , typename MT4 // Type of the left-hand side matrix operand
1288  , typename MT5 > // Type of the right-hand side matrix operand
1289  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1290  {
1291  if( C.rows() * C.columns() < TDMATDMATMULT_THRESHOLD )
1292  TDMatDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1293  else
1294  TDMatDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1295  }
1297  //**********************************************************************************************
1298 
1299  //**Default addition assignment to dense matrices***********************************************
1313  template< typename MT3 // Type of the left-hand side target matrix
1314  , typename MT4 // Type of the left-hand side matrix operand
1315  , typename MT5 > // Type of the right-hand side matrix operand
1316  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1317  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1318  {
1319  const size_t M( A.rows() );
1320  const size_t N( B.columns() );
1321  const size_t K( A.columns() );
1322 
1323  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1324  const size_t end( N & size_t(-2) );
1325 
1326  for( size_t i=0UL; i<M; ++i ) {
1327  for( size_t k=0UL; k<K; ++k ) {
1328  for( size_t j=0UL; j<end; j+=2UL ) {
1329  C(i,j ) += A(i,k) * B(k,j );
1330  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1331  }
1332  if( end < N ) {
1333  C(i,end) += A(i,k) * B(k,end);
1334  }
1335  }
1336  }
1337  }
1339  //**********************************************************************************************
1340 
1341  //**Vectorized default addition assignment to row-major dense matrices**************************
1355  template< typename MT3 // Type of the left-hand side target matrix
1356  , typename MT4 // Type of the left-hand side matrix operand
1357  , typename MT5 > // Type of the right-hand side matrix operand
1358  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1359  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1360  {
1361  typedef IntrinsicTrait<ElementType> IT;
1362 
1363  const size_t M( A.rows() );
1364  const size_t N( B.columns() );
1365  const size_t K( A.columns() );
1366 
1367  size_t j( 0UL );
1368 
1369  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1370  for( size_t i=0UL; i<M; ++i ) {
1371  IntrinsicType xmm1( (~C).load(i,j ) );
1372  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1373  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1374  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1375  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1376  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1377  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1378  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1379  for( size_t k=0UL; k<K; ++k ) {
1380  const IntrinsicType a1( set( A(i,k) ) );
1381  xmm1 = xmm1 + a1 * B.load(k,j );
1382  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1383  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1384  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1385  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
1386  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
1387  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
1388  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
1389  }
1390  (~C).store( i, j , xmm1 );
1391  (~C).store( i, j+IT::size , xmm2 );
1392  (~C).store( i, j+IT::size*2UL, xmm3 );
1393  (~C).store( i, j+IT::size*3UL, xmm4 );
1394  (~C).store( i, j+IT::size*4UL, xmm5 );
1395  (~C).store( i, j+IT::size*5UL, xmm6 );
1396  (~C).store( i, j+IT::size*6UL, xmm7 );
1397  (~C).store( i, j+IT::size*7UL, xmm8 );
1398  }
1399  }
1400  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1401  size_t i( 0UL );
1402  for( ; (i+2UL) <= M; i+=2UL ) {
1403  IntrinsicType xmm1( (~C).load(i ,j ) );
1404  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1405  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1406  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1407  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1408  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1409  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1410  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1411  for( size_t k=0UL; k<K; ++k ) {
1412  const IntrinsicType a1( set( A(i ,k) ) );
1413  const IntrinsicType a2( set( A(i+1UL,k) ) );
1414  const IntrinsicType b1( B.load(k,j ) );
1415  const IntrinsicType b2( B.load(k,j+IT::size ) );
1416  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
1417  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
1418  xmm1 = xmm1 + a1 * b1;
1419  xmm2 = xmm2 + a1 * b2;
1420  xmm3 = xmm3 + a1 * b3;
1421  xmm4 = xmm4 + a1 * b4;
1422  xmm5 = xmm5 + a2 * b1;
1423  xmm6 = xmm6 + a2 * b2;
1424  xmm7 = xmm7 + a2 * b3;
1425  xmm8 = xmm8 + a2 * b4;
1426  }
1427  (~C).store( i , j , xmm1 );
1428  (~C).store( i , j+IT::size , xmm2 );
1429  (~C).store( i , j+IT::size*2UL, xmm3 );
1430  (~C).store( i , j+IT::size*3UL, xmm4 );
1431  (~C).store( i+1UL, j , xmm5 );
1432  (~C).store( i+1UL, j+IT::size , xmm6 );
1433  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
1434  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
1435  }
1436  if( i < M ) {
1437  IntrinsicType xmm1( (~C).load(i,j ) );
1438  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1439  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1440  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1441  for( size_t k=0UL; k<K; ++k ) {
1442  const IntrinsicType a1( set( A(i,k) ) );
1443  xmm1 = xmm1 + a1 * B.load(k,j );
1444  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1445  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1446  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1447  }
1448  (~C).store( i, j , xmm1 );
1449  (~C).store( i, j+IT::size , xmm2 );
1450  (~C).store( i, j+IT::size*2UL, xmm3 );
1451  (~C).store( i, j+IT::size*3UL, xmm4 );
1452  }
1453  }
1454  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1455  size_t i( 0UL );
1456  for( ; (i+2UL) <= M; i+=2UL ) {
1457  IntrinsicType xmm1( (~C).load(i ,j ) );
1458  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
1459  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1460  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
1461  for( size_t k=0UL; k<K; ++k ) {
1462  const IntrinsicType a1( set( A(i ,k) ) );
1463  const IntrinsicType a2( set( A(i+1UL,k) ) );
1464  const IntrinsicType b1( B.load(k,j ) );
1465  const IntrinsicType b2( B.load(k,j+IT::size) );
1466  xmm1 = xmm1 + a1 * b1;
1467  xmm2 = xmm2 + a1 * b2;
1468  xmm3 = xmm3 + a2 * b1;
1469  xmm4 = xmm4 + a2 * b2;
1470  }
1471  (~C).store( i , j , xmm1 );
1472  (~C).store( i , j+IT::size, xmm2 );
1473  (~C).store( i+1UL, j , xmm3 );
1474  (~C).store( i+1UL, j+IT::size, xmm4 );
1475  }
1476  if( i < M ) {
1477  IntrinsicType xmm1( (~C).load(i,j ) );
1478  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
1479  for( size_t k=0UL; k<K; ++k ) {
1480  const IntrinsicType a1( set( A(i,k) ) );
1481  xmm1 = xmm1 + a1 * B.load(k,j );
1482  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
1483  }
1484  (~C).store( i, j , xmm1 );
1485  (~C).store( i, j+IT::size, xmm2 );
1486  }
1487  }
1488  if( j < N ) {
1489  size_t i( 0UL );
1490  for( ; (i+2UL) <= M; i+=2UL ) {
1491  IntrinsicType xmm1( (~C).load(i ,j) );
1492  IntrinsicType xmm2( (~C).load(i+1UL,j) );
1493  for( size_t k=0UL; k<K; ++k ) {
1494  const IntrinsicType b1( B.load(k,j) );
1495  xmm1 = xmm1 + set( A(i ,k) ) * b1;
1496  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
1497  }
1498  (~C).store( i , j, xmm1 );
1499  (~C).store( i+1UL, j, xmm2 );
1500  }
1501  if( i < M ) {
1502  IntrinsicType xmm1( (~C).load(i,j) );
1503  for( size_t k=0UL; k<K; ++k ) {
1504  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
1505  }
1506  (~C).store( i, j, xmm1 );
1507  }
1508  }
1509  }
1511  //**********************************************************************************************
1512 
1513  //**Vectorized default addition assignment to column-major dense matrices***********************
1527  template< typename MT3 // Type of the left-hand side target matrix
1528  , typename MT4 // Type of the left-hand side matrix operand
1529  , typename MT5 > // Type of the right-hand side matrix operand
1530  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1531  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1532  {
1533  typedef IntrinsicTrait<ElementType> IT;
1534 
1535  const size_t M( A.rows() );
1536  const size_t N( B.columns() );
1537  const size_t K( A.columns() );
1538 
1539  size_t i( 0UL );
1540 
1541  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1542  for( size_t j=0UL; j<N; ++j ) {
1543  IntrinsicType xmm1( (~C).load(i ,j) );
1544  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1545  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1546  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1547  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
1548  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
1549  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
1550  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
1551  for( size_t k=0UL; k<K; ++k ) {
1552  const IntrinsicType b1( set( B(k,j) ) );
1553  xmm1 = xmm1 + A.load(i ,k) * b1;
1554  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1555  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1556  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1557  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
1558  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
1559  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
1560  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
1561  }
1562  (~C).store( i , j, xmm1 );
1563  (~C).store( i+IT::size , j, xmm2 );
1564  (~C).store( i+IT::size*2UL, j, xmm3 );
1565  (~C).store( i+IT::size*3UL, j, xmm4 );
1566  (~C).store( i+IT::size*4UL, j, xmm5 );
1567  (~C).store( i+IT::size*5UL, j, xmm6 );
1568  (~C).store( i+IT::size*6UL, j, xmm7 );
1569  (~C).store( i+IT::size*7UL, j, xmm8 );
1570  }
1571  }
1572  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1573  size_t j( 0UL );
1574  for( ; (j+2UL) <= N; j+=2UL ) {
1575  IntrinsicType xmm1( (~C).load(i ,j ) );
1576  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
1577  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
1578  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
1579  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1580  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
1581  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
1582  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
1583  for( size_t k=0UL; k<K; ++k ) {
1584  const IntrinsicType a1( A.load(i ,k) );
1585  const IntrinsicType a2( A.load(i+IT::size ,k) );
1586  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
1587  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
1588  const IntrinsicType b1( set( B(k,j ) ) );
1589  const IntrinsicType b2( set( B(k,j+1UL) ) );
1590  xmm1 = xmm1 + a1 * b1;
1591  xmm2 = xmm2 + a2 * b1;
1592  xmm3 = xmm3 + a3 * b1;
1593  xmm4 = xmm4 + a4 * b1;
1594  xmm5 = xmm5 + a1 * b2;
1595  xmm6 = xmm6 + a2 * b2;
1596  xmm7 = xmm7 + a3 * b2;
1597  xmm8 = xmm8 + a4 * b2;
1598  }
1599  (~C).store( i , j , xmm1 );
1600  (~C).store( i+IT::size , j , xmm2 );
1601  (~C).store( i+IT::size*2UL, j , xmm3 );
1602  (~C).store( i+IT::size*3UL, j , xmm4 );
1603  (~C).store( i , j+1UL, xmm5 );
1604  (~C).store( i+IT::size , j+1UL, xmm6 );
1605  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
1606  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
1607  }
1608  if( j < N ) {
1609  IntrinsicType xmm1( (~C).load(i ,j) );
1610  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1611  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1612  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1613  for( size_t k=0UL; k<K; ++k ) {
1614  const IntrinsicType b1( set( B(k,j) ) );
1615  xmm1 = xmm1 + A.load(i ,k) * b1;
1616  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1617  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1618  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1619  }
1620  (~C).store( i , j, xmm1 );
1621  (~C).store( i+IT::size , j, xmm2 );
1622  (~C).store( i+IT::size*2UL, j, xmm3 );
1623  (~C).store( i+IT::size*3UL, j, xmm4 );
1624  }
1625  }
1626  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1627  size_t j( 0UL );
1628  for( ; (j+2UL) <= N; j+=2UL ) {
1629  IntrinsicType xmm1( (~C).load(i ,j ) );
1630  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
1631  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1632  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
1633  for( size_t k=0UL; k<K; ++k ) {
1634  const IntrinsicType a1( A.load(i ,k) );
1635  const IntrinsicType a2( A.load(i+IT::size,k) );
1636  const IntrinsicType b1( set( B(k,j ) ) );
1637  const IntrinsicType b2( set( B(k,j+1UL) ) );
1638  xmm1 = xmm1 + a1 * b1;
1639  xmm2 = xmm2 + a2 * b1;
1640  xmm3 = xmm3 + a1 * b2;
1641  xmm4 = xmm4 + a2 * b2;
1642  }
1643  (~C).store( i , j , xmm1 );
1644  (~C).store( i+IT::size, j , xmm2 );
1645  (~C).store( i , j+1UL, xmm3 );
1646  (~C).store( i+IT::size, j+1UL, xmm4 );
1647  }
1648  if( j < N ) {
1649  IntrinsicType xmm1( (~C).load(i ,j) );
1650  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
1651  for( size_t k=0UL; k<K; ++k ) {
1652  const IntrinsicType b1( set( B(k,j) ) );
1653  xmm1 = xmm1 + A.load(i ,k) * b1;
1654  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1655  }
1656  (~C).store( i , j, xmm1 );
1657  (~C).store( i+IT::size, j, xmm2 );
1658  }
1659  }
1660  if( i < M ) {
1661  size_t j( 0UL );
1662  for( ; (j+2UL) <= N; j+=2UL ) {
1663  IntrinsicType xmm1( (~C).load(i,j ) );
1664  IntrinsicType xmm2( (~C).load(i,j+1UL) );
1665  for( size_t k=0UL; k<K; ++k ) {
1666  const IntrinsicType a1( A.load(i,k) );
1667  xmm1 = xmm1 + a1 * set( B(k,j ) );
1668  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1669  }
1670  (~C).store( i, j , xmm1 );
1671  (~C).store( i, j+1UL, xmm2 );
1672  }
1673  if( j < N ) {
1674  IntrinsicType xmm1( (~C).load(i,j) );
1675  for( size_t k=0UL; k<K; ++k ) {
1676  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
1677  }
1678  (~C).store( i, j, xmm1 );
1679  }
1680  }
1681  }
1683  //**********************************************************************************************
1684 
1685  //**BLAS-based addition assignment to dense matrices (default)**********************************
1699  template< typename MT3 // Type of the left-hand side target matrix
1700  , typename MT4 // Type of the left-hand side matrix operand
1701  , typename MT5 > // Type of the right-hand side matrix operand
1702  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1703  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1704  {
1705  selectDefaultAddAssignKernel( C, A, B );
1706  }
1708  //**********************************************************************************************
1709 
1710  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1711 #if BLAZE_BLAS_MODE
1712 
1725  template< typename MT3 // Type of the left-hand side target matrix
1726  , typename MT4 // Type of the left-hand side matrix operand
1727  , typename MT5 > // Type of the right-hand side matrix operand
1728  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1729  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1730  {
1731  sgemm( C, A, B, 1.0F, 1.0F );
1732  }
1734 #endif
1735  //**********************************************************************************************
1736 
1737  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1738 #if BLAZE_BLAS_MODE
1739 
1752  template< typename MT3 // Type of the left-hand side target matrix
1753  , typename MT4 // Type of the left-hand side matrix operand
1754  , typename MT5 > // Type of the right-hand side matrix operand
1755  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1756  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1757  {
1758  dgemm( C, A, B, 1.0, 1.0 );
1759  }
1761 #endif
1762  //**********************************************************************************************
1763 
1764  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1765 #if BLAZE_BLAS_MODE
1766 
1779  template< typename MT3 // Type of the left-hand side target matrix
1780  , typename MT4 // Type of the left-hand side matrix operand
1781  , typename MT5 > // Type of the right-hand side matrix operand
1782  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1783  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1784  {
1785  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1786  }
1788 #endif
1789  //**********************************************************************************************
1790 
1791  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1792 #if BLAZE_BLAS_MODE
1793 
1806  template< typename MT3 // Type of the left-hand side target matrix
1807  , typename MT4 // Type of the left-hand side matrix operand
1808  , typename MT5 > // Type of the right-hand side matrix operand
1809  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1810  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1811  {
1812  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1813  }
1815 #endif
1816  //**********************************************************************************************
1817 
1818  //**Addition assignment to sparse matrices******************************************************
1819  // No special implementation for the addition assignment to sparse matrices.
1820  //**********************************************************************************************
1821 
1822  //**Subtraction assignment to dense matrices****************************************************
1835  template< typename MT // Type of the target dense matrix
1836  , bool SO > // Storage order of the target dense matrix
1837  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const TDMatDMatMultExpr& rhs )
1838  {
1840 
1841  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1842  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1843 
1844  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1845  return;
1846  }
1847 
1848  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1849  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1850 
1851  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1852  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1853  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1854  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1855  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1856  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1857 
1858  TDMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1859  }
1861  //**********************************************************************************************
1862 
1863  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1874  template< typename MT3 // Type of the left-hand side target matrix
1875  , typename MT4 // Type of the left-hand side matrix operand
1876  , typename MT5 > // Type of the right-hand side matrix operand
1877  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1878  {
1879  if( C.rows() * C.columns() < TDMATDMATMULT_THRESHOLD )
1880  TDMatDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1881  else
1882  TDMatDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1883  }
1885  //**********************************************************************************************
1886 
1887  //**Default subtraction assignment to dense matrices********************************************
1901  template< typename MT3 // Type of the left-hand side target matrix
1902  , typename MT4 // Type of the left-hand side matrix operand
1903  , typename MT5 > // Type of the right-hand side matrix operand
1904  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1905  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1906  {
1907  const size_t M( A.rows() );
1908  const size_t N( B.columns() );
1909  const size_t K( A.columns() );
1910 
1911  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1912  const size_t end( N & size_t(-2) );
1913 
1914  for( size_t i=0UL; i<M; ++i ) {
1915  for( size_t k=0UL; k<K; ++k ) {
1916  for( size_t j=0UL; j<end; j+=2UL ) {
1917  C(i,j ) -= A(i,k) * B(k,j );
1918  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1919  }
1920  if( end < N ) {
1921  C(i,end) -= A(i,k) * B(k,end);
1922  }
1923  }
1924  }
1925  }
1927  //**********************************************************************************************
1928 
1929  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1943  template< typename MT3 // Type of the left-hand side target matrix
1944  , typename MT4 // Type of the left-hand side matrix operand
1945  , typename MT5 > // Type of the right-hand side matrix operand
1946  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1947  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1948  {
1949  typedef IntrinsicTrait<ElementType> IT;
1950 
1951  const size_t M( A.rows() );
1952  const size_t N( B.columns() );
1953  const size_t K( A.columns() );
1954 
1955  size_t j( 0UL );
1956 
1957  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1958  for( size_t i=0UL; i<M; ++i ) {
1959  IntrinsicType xmm1( (~C).load(i,j ) );
1960  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1961  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1962  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1963  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1964  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1965  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1966  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1967  for( size_t k=0UL; k<K; ++k ) {
1968  const IntrinsicType a1( set( A(i,k) ) );
1969  xmm1 = xmm1 - a1 * B.load(k,j );
1970  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1971  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1972  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1973  xmm5 = xmm5 - a1 * B.load(k,j+IT::size*4UL);
1974  xmm6 = xmm6 - a1 * B.load(k,j+IT::size*5UL);
1975  xmm7 = xmm7 - a1 * B.load(k,j+IT::size*6UL);
1976  xmm8 = xmm8 - a1 * B.load(k,j+IT::size*7UL);
1977  }
1978  (~C).store( i, j , xmm1 );
1979  (~C).store( i, j+IT::size , xmm2 );
1980  (~C).store( i, j+IT::size*2UL, xmm3 );
1981  (~C).store( i, j+IT::size*3UL, xmm4 );
1982  (~C).store( i, j+IT::size*4UL, xmm5 );
1983  (~C).store( i, j+IT::size*5UL, xmm6 );
1984  (~C).store( i, j+IT::size*6UL, xmm7 );
1985  (~C).store( i, j+IT::size*7UL, xmm8 );
1986  }
1987  }
1988  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1989  size_t i( 0UL );
1990  for( ; (i+2UL) <= M; i+=2UL ) {
1991  IntrinsicType xmm1( (~C).load(i ,j ) );
1992  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1993  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1994  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1995  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1996  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1997  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1998  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1999  for( size_t k=0UL; k<K; ++k ) {
2000  const IntrinsicType a1( set( A(i ,k) ) );
2001  const IntrinsicType a2( set( A(i+1UL,k) ) );
2002  const IntrinsicType b1( B.load(k,j ) );
2003  const IntrinsicType b2( B.load(k,j+IT::size ) );
2004  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
2005  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
2006  xmm1 = xmm1 - a1 * b1;
2007  xmm2 = xmm2 - a1 * b2;
2008  xmm3 = xmm3 - a1 * b3;
2009  xmm4 = xmm4 - a1 * b4;
2010  xmm5 = xmm5 - a2 * b1;
2011  xmm6 = xmm6 - a2 * b2;
2012  xmm7 = xmm7 - a2 * b3;
2013  xmm8 = xmm8 - a2 * b4;
2014  }
2015  (~C).store( i , j , xmm1 );
2016  (~C).store( i , j+IT::size , xmm2 );
2017  (~C).store( i , j+IT::size*2UL, xmm3 );
2018  (~C).store( i , j+IT::size*3UL, xmm4 );
2019  (~C).store( i+1UL, j , xmm5 );
2020  (~C).store( i+1UL, j+IT::size , xmm6 );
2021  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
2022  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
2023  }
2024  if( i < M ) {
2025  IntrinsicType xmm1( (~C).load(i,j ) );
2026  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
2027  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
2028  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
2029  for( size_t k=0UL; k<K; ++k ) {
2030  const IntrinsicType a1( set( A(i,k) ) );
2031  xmm1 = xmm1 - a1 * B.load(k,j );
2032  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
2033  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
2034  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
2035  }
2036  (~C).store( i, j , xmm1 );
2037  (~C).store( i, j+IT::size , xmm2 );
2038  (~C).store( i, j+IT::size*2UL, xmm3 );
2039  (~C).store( i, j+IT::size*3UL, xmm4 );
2040  }
2041  }
2042  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2043  size_t i( 0UL );
2044  for( ; (i+2UL) <= M; i+=2UL ) {
2045  IntrinsicType xmm1( (~C).load(i ,j ) );
2046  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
2047  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
2048  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
2049  for( size_t k=0UL; k<K; ++k ) {
2050  const IntrinsicType a1( set( A(i ,k) ) );
2051  const IntrinsicType a2( set( A(i+1UL,k) ) );
2052  const IntrinsicType b1( B.load(k,j ) );
2053  const IntrinsicType b2( B.load(k,j+IT::size) );
2054  xmm1 = xmm1 - a1 * b1;
2055  xmm2 = xmm2 - a1 * b2;
2056  xmm3 = xmm3 - a2 * b1;
2057  xmm4 = xmm4 - a2 * b2;
2058  }
2059  (~C).store( i , j , xmm1 );
2060  (~C).store( i , j+IT::size, xmm2 );
2061  (~C).store( i+1UL, j , xmm3 );
2062  (~C).store( i+1UL, j+IT::size, xmm4 );
2063  }
2064  if( i < M ) {
2065  IntrinsicType xmm1( (~C).load(i,j ) );
2066  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
2067  for( size_t k=0UL; k<K; ++k ) {
2068  const IntrinsicType a1( set( A(i,k) ) );
2069  xmm1 = xmm1 - a1 * B.load(k,j );
2070  xmm2 = xmm2 - a1 * B.load(k,j+IT::size);
2071  }
2072  (~C).store( i, j , xmm1 );
2073  (~C).store( i, j+IT::size, xmm2 );
2074  }
2075  }
2076  if( j < N ) {
2077  size_t i( 0UL );
2078  for( ; (i+2UL) <= M; i+=2UL ) {
2079  IntrinsicType xmm1( (~C).load(i ,j) );
2080  IntrinsicType xmm2( (~C).load(i+1UL,j) );
2081  for( size_t k=0UL; k<K; ++k ) {
2082  const IntrinsicType b1( B.load(k,j) );
2083  xmm1 = xmm1 - set( A(i ,k) ) * b1;
2084  xmm2 = xmm2 - set( A(i+1UL,k) ) * b1;
2085  }
2086  (~C).store( i , j, xmm1 );
2087  (~C).store( i+1UL, j, xmm2 );
2088  }
2089  if( i < M ) {
2090  IntrinsicType xmm1( (~C).load(i,j) );
2091  for( size_t k=0UL; k<K; ++k ) {
2092  xmm1 = xmm1 - set( A(i,k) ) * B.load(k,j);
2093  }
2094  (~C).store( i, j, xmm1 );
2095  }
2096  }
2097  }
2099  //**********************************************************************************************
2100 
2101  //**Vectorized default subtraction assignment to column-major dense matrices********************
2115  template< typename MT3 // Type of the left-hand side target matrix
2116  , typename MT4 // Type of the left-hand side matrix operand
2117  , typename MT5 > // Type of the right-hand side matrix operand
2118  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2119  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2120  {
2121  typedef IntrinsicTrait<ElementType> IT;
2122 
2123  const size_t M( A.rows() );
2124  const size_t N( B.columns() );
2125  const size_t K( A.columns() );
2126 
2127  size_t i( 0UL );
2128 
2129  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2130  for( size_t j=0UL; j<N; ++j ) {
2131  IntrinsicType xmm1( (~C).load(i ,j) );
2132  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
2133  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
2134  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
2135  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
2136  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
2137  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
2138  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
2139  for( size_t k=0UL; k<K; ++k ) {
2140  const IntrinsicType b1( set( B(k,j) ) );
2141  xmm1 = xmm1 - A.load(i ,k) * b1;
2142  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
2143  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
2144  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
2145  xmm5 = xmm5 - A.load(i+IT::size*4UL,k) * b1;
2146  xmm6 = xmm6 - A.load(i+IT::size*5UL,k) * b1;
2147  xmm7 = xmm7 - A.load(i+IT::size*6UL,k) * b1;
2148  xmm8 = xmm8 - A.load(i+IT::size*7UL,k) * b1;
2149  }
2150  (~C).store( i , j, xmm1 );
2151  (~C).store( i+IT::size , j, xmm2 );
2152  (~C).store( i+IT::size*2UL, j, xmm3 );
2153  (~C).store( i+IT::size*3UL, j, xmm4 );
2154  (~C).store( i+IT::size*4UL, j, xmm5 );
2155  (~C).store( i+IT::size*5UL, j, xmm6 );
2156  (~C).store( i+IT::size*6UL, j, xmm7 );
2157  (~C).store( i+IT::size*7UL, j, xmm8 );
2158  }
2159  }
2160  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2161  size_t j( 0UL );
2162  for( ; (j+2UL) <= N; j+=2UL ) {
2163  IntrinsicType xmm1( (~C).load(i ,j ) );
2164  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
2165  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
2166  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
2167  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
2168  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
2169  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
2170  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
2171  for( size_t k=0UL; k<K; ++k ) {
2172  const IntrinsicType a1( A.load(i ,k) );
2173  const IntrinsicType a2( A.load(i+IT::size ,k) );
2174  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
2175  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
2176  const IntrinsicType b1( set( B(k,j ) ) );
2177  const IntrinsicType b2( set( B(k,j+1UL) ) );
2178  xmm1 = xmm1 - a1 * b1;
2179  xmm2 = xmm2 - a2 * b1;
2180  xmm3 = xmm3 - a3 * b1;
2181  xmm4 = xmm4 - a4 * b1;
2182  xmm5 = xmm5 - a1 * b2;
2183  xmm6 = xmm6 - a2 * b2;
2184  xmm7 = xmm7 - a3 * b2;
2185  xmm8 = xmm8 - a4 * b2;
2186  }
2187  (~C).store( i , j , xmm1 );
2188  (~C).store( i+IT::size , j , xmm2 );
2189  (~C).store( i+IT::size*2UL, j , xmm3 );
2190  (~C).store( i+IT::size*3UL, j , xmm4 );
2191  (~C).store( i , j+1UL, xmm5 );
2192  (~C).store( i+IT::size , j+1UL, xmm6 );
2193  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
2194  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
2195  }
2196  if( j < N ) {
2197  IntrinsicType xmm1( (~C).load(i ,j) );
2198  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
2199  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
2200  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
2201  for( size_t k=0UL; k<K; ++k ) {
2202  const IntrinsicType b1( set( B(k,j) ) );
2203  xmm1 = xmm1 - A.load(i ,k) * b1;
2204  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
2205  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
2206  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
2207  }
2208  (~C).store( i , j, xmm1 );
2209  (~C).store( i+IT::size , j, xmm2 );
2210  (~C).store( i+IT::size*2UL, j, xmm3 );
2211  (~C).store( i+IT::size*3UL, j, xmm4 );
2212  }
2213  }
2214  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2215  size_t j( 0UL );
2216  for( ; (j+2UL) <= N; j+=2UL ) {
2217  IntrinsicType xmm1( (~C).load(i ,j ) );
2218  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
2219  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
2220  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
2221  for( size_t k=0UL; k<K; ++k ) {
2222  const IntrinsicType a1( A.load(i ,k) );
2223  const IntrinsicType a2( A.load(i+IT::size,k) );
2224  const IntrinsicType b1( set( B(k,j ) ) );
2225  const IntrinsicType b2( set( B(k,j+1UL) ) );
2226  xmm1 = xmm1 - a1 * b1;
2227  xmm2 = xmm2 - a2 * b1;
2228  xmm3 = xmm3 - a1 * b2;
2229  xmm4 = xmm4 - a2 * b2;
2230  }
2231  (~C).store( i , j , xmm1 );
2232  (~C).store( i+IT::size, j , xmm2 );
2233  (~C).store( i , j+1UL, xmm3 );
2234  (~C).store( i+IT::size, j+1UL, xmm4 );
2235  }
2236  if( j < N ) {
2237  IntrinsicType xmm1( (~C).load(i ,j) );
2238  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
2239  for( size_t k=0UL; k<K; ++k ) {
2240  const IntrinsicType b1( set( B(k,j) ) );
2241  xmm1 = xmm1 - A.load(i ,k) * b1;
2242  xmm2 = xmm2 - A.load(i+IT::size,k) * b1;
2243  }
2244  (~C).store( i , j, xmm1 );
2245  (~C).store( i+IT::size, j, xmm2 );
2246  }
2247  }
2248  if( i < M ) {
2249  size_t j( 0UL );
2250  for( ; (j+2UL) <= N; j+=2UL ) {
2251  IntrinsicType xmm1( (~C).load(i,j ) );
2252  IntrinsicType xmm2( (~C).load(i,j+1UL) );
2253  for( size_t k=0UL; k<K; ++k ) {
2254  const IntrinsicType a1( A.load(i,k) );
2255  xmm1 = xmm1 - a1 * set( B(k,j ) );
2256  xmm2 = xmm2 - a1 * set( B(k,j+1UL) );
2257  }
2258  (~C).store( i, j , xmm1 );
2259  (~C).store( i, j+1UL, xmm2 );
2260  }
2261  if( j < N ) {
2262  IntrinsicType xmm1( (~C).load(i,j) );
2263  for( size_t k=0UL; k<K; ++k ) {
2264  xmm1 = xmm1 - A.load(i,k) * set( B(k,j) );
2265  }
2266  (~C).store( i, j, xmm1 );
2267  }
2268  }
2269  }
2271  //**********************************************************************************************
2272 
2273  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
2287  template< typename MT3 // Type of the left-hand side target matrix
2288  , typename MT4 // Type of the left-hand side matrix operand
2289  , typename MT5 > // Type of the right-hand side matrix operand
2290  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2291  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2292  {
2293  selectDefaultSubAssignKernel( C, A, B );
2294  }
2296  //**********************************************************************************************
2297 
2298  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
2299 #if BLAZE_BLAS_MODE
2300 
2313  template< typename MT3 // Type of the left-hand side target matrix
2314  , typename MT4 // Type of the left-hand side matrix operand
2315  , typename MT5 > // Type of the right-hand side matrix operand
2316  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2317  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2318  {
2319  sgemm( C, A, B, -1.0F, 1.0F );
2320  }
2322 #endif
2323  //**********************************************************************************************
2324 
2325  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
2326 #if BLAZE_BLAS_MODE
2327 
2340  template< typename MT3 // Type of the left-hand side target matrix
2341  , typename MT4 // Type of the left-hand side matrix operand
2342  , typename MT5 > // Type of the right-hand side matrix operand
2343  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2344  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2345  {
2346  dgemm( C, A, B, -1.0, 1.0 );
2347  }
2349 #endif
2350  //**********************************************************************************************
2351 
2352  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
2353 #if BLAZE_BLAS_MODE
2354 
2367  template< typename MT3 // Type of the left-hand side target matrix
2368  , typename MT4 // Type of the left-hand side matrix operand
2369  , typename MT5 > // Type of the right-hand side matrix operand
2370  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2371  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2372  {
2373  cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2374  }
2376 #endif
2377  //**********************************************************************************************
2378 
2379  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
2380 #if BLAZE_BLAS_MODE
2381 
2394  template< typename MT3 // Type of the left-hand side target matrix
2395  , typename MT4 // Type of the left-hand side matrix operand
2396  , typename MT5 > // Type of the right-hand side matrix operand
2397  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2398  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2399  {
2400  zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2401  }
2403 #endif
2404  //**********************************************************************************************
2405 
2406  //**Subtraction assignment to sparse matrices***************************************************
2407  // No special implementation for the subtraction assignment to sparse matrices.
2408  //**********************************************************************************************
2409 
2410  //**Multiplication assignment to dense matrices*************************************************
2411  // No special implementation for the multiplication assignment to dense matrices.
2412  //**********************************************************************************************
2413 
2414  //**Multiplication assignment to sparse matrices************************************************
2415  // No special implementation for the multiplication assignment to sparse matrices.
2416  //**********************************************************************************************
2417 
2418  //**SMP assignment to dense matrices************************************************************
2434  template< typename MT // Type of the target dense matrix
2435  , bool SO > // Storage order of the target dense matrix
2436  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2437  smpAssign( DenseMatrix<MT,SO>& lhs, const TDMatDMatMultExpr& rhs )
2438  {
2440 
2441  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2442  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2443 
2444  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2445  return;
2446  }
2447  else if( rhs.lhs_.columns() == 0UL ) {
2448  reset( ~lhs );
2449  return;
2450  }
2451 
2452  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2453  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2454 
2455  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2456  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2457  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2458  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2459  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2460  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2461 
2462  smpAssign( ~lhs, A * B );
2463  }
2465  //**********************************************************************************************
2466 
2467  //**SMP assignment to sparse matrices***********************************************************
2483  template< typename MT // Type of the target sparse matrix
2484  , bool SO > // Storage order of the target sparse matrix
2485  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2486  smpAssign( SparseMatrix<MT,SO>& lhs, const TDMatDMatMultExpr& rhs )
2487  {
2489 
2490  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2491 
2498 
2499  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2500  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2501 
2502  const TmpType tmp( rhs );
2503  smpAssign( ~lhs, tmp );
2504  }
2506  //**********************************************************************************************
2507 
2508  //**SMP addition assignment to dense matrices***************************************************
2524  template< typename MT // Type of the target dense matrix
2525  , bool SO > // Storage order of the target dense matrix
2526  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2527  smpAddAssign( DenseMatrix<MT,SO>& lhs, const TDMatDMatMultExpr& rhs )
2528  {
2530 
2531  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2532  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2533 
2534  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2535  return;
2536  }
2537 
2538  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2539  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2540 
2541  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2542  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2543  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2544  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2545  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2546  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2547 
2548  smpAddAssign( ~lhs, A * B );
2549  }
2551  //**********************************************************************************************
2552 
2553  //**SMP addition assignment to sparse matrices**************************************************
2554  // No special implementation for the SMP addition assignment to sparse matrices.
2555  //**********************************************************************************************
2556 
2557  //**SMP subtraction assignment to dense matrices************************************************
2573  template< typename MT // Type of the target dense matrix
2574  , bool SO > // Storage order of the target dense matrix
2575  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2576  smpSubAssign( DenseMatrix<MT,SO>& lhs, const TDMatDMatMultExpr& rhs )
2577  {
2579 
2580  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2581  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2582 
2583  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2584  return;
2585  }
2586 
2587  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2588  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2589 
2590  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2591  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2592  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2593  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2594  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2595  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2596 
2597  smpSubAssign( ~lhs, A * B );
2598  }
2600  //**********************************************************************************************
2601 
2602  //**SMP subtraction assignment to sparse matrices***********************************************
2603  // No special implementation for the SMP subtraction assignment to sparse matrices.
2604  //**********************************************************************************************
2605 
2606  //**SMP multiplication assignment to dense matrices*********************************************
2607  // No special implementation for the SMP multiplication assignment to dense matrices.
2608  //**********************************************************************************************
2609 
2610  //**SMP multiplication assignment to sparse matrices********************************************
2611  // No special implementation for the SMP multiplication assignment to sparse matrices.
2612  //**********************************************************************************************
2613 
2614  //**Compile time checks*************************************************************************
2622  //**********************************************************************************************
2623 };
2624 //*************************************************************************************************
2625 
2626 
2627 
2628 
2629 //=================================================================================================
2630 //
2631 // DMATSCALARMULTEXPR SPECIALIZATION
2632 //
2633 //=================================================================================================
2634 
2635 //*************************************************************************************************
2643 template< typename MT1 // Type of the left-hand side dense matrix
2644  , typename MT2 // Type of the right-hand side dense matrix
2645  , typename ST > // Type of the right-hand side scalar value
2646 class DMatScalarMultExpr< TDMatDMatMultExpr<MT1,MT2>, ST, true >
2647  : public DenseMatrix< DMatScalarMultExpr< TDMatDMatMultExpr<MT1,MT2>, ST, true >, true >
2648  , private MatScalarMultExpr
2649  , private Computation
2650 {
2651  private:
2652  //**Type definitions****************************************************************************
2653  typedef TDMatDMatMultExpr<MT1,MT2> MMM;
2654  typedef typename MMM::ResultType RES;
2655  typedef typename MT1::ResultType RT1;
2656  typedef typename MT2::ResultType RT2;
2657  typedef typename RT1::ElementType ET1;
2658  typedef typename RT2::ElementType ET2;
2659  typedef typename MT1::CompositeType CT1;
2660  typedef typename MT2::CompositeType CT2;
2661  //**********************************************************************************************
2662 
2663  //**********************************************************************************************
2665  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2666  //**********************************************************************************************
2667 
2668  //**********************************************************************************************
2670  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2671  //**********************************************************************************************
2672 
2673  //**********************************************************************************************
2675 
2678  template< typename T1, typename T2, typename T3 >
2679  struct IsEvaluationRequired {
2680  enum { value = ( evaluateLeft || evaluateRight ) };
2681  };
2682  //**********************************************************************************************
2683 
2684  //**********************************************************************************************
2686 
2689  template< typename T1, typename T2, typename T3, typename T4 >
2690  struct UseSinglePrecisionKernel {
2691  enum { value = BLAZE_BLAS_MODE &&
2692  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2693  IsFloat<typename T1::ElementType>::value &&
2694  IsFloat<typename T2::ElementType>::value &&
2695  IsFloat<typename T3::ElementType>::value &&
2696  !IsComplex<T4>::value };
2697  };
2698  //**********************************************************************************************
2699 
2700  //**********************************************************************************************
2702 
2705  template< typename T1, typename T2, typename T3, typename T4 >
2706  struct UseDoublePrecisionKernel {
2707  enum { value = BLAZE_BLAS_MODE &&
2708  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2709  IsDouble<typename T1::ElementType>::value &&
2710  IsDouble<typename T2::ElementType>::value &&
2711  IsDouble<typename T3::ElementType>::value &&
2712  !IsComplex<T4>::value };
2713  };
2714  //**********************************************************************************************
2715 
2716  //**********************************************************************************************
2718 
2721  template< typename T1, typename T2, typename T3 >
2722  struct UseSinglePrecisionComplexKernel {
2723  typedef complex<float> Type;
2724  enum { value = BLAZE_BLAS_MODE &&
2725  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2726  IsSame<typename T1::ElementType,Type>::value &&
2727  IsSame<typename T2::ElementType,Type>::value &&
2728  IsSame<typename T3::ElementType,Type>::value };
2729  };
2730  //**********************************************************************************************
2731 
2732  //**********************************************************************************************
2734 
2737  template< typename T1, typename T2, typename T3 >
2738  struct UseDoublePrecisionComplexKernel {
2739  typedef complex<double> Type;
2740  enum { value = BLAZE_BLAS_MODE &&
2741  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2742  IsSame<typename T1::ElementType,Type>::value &&
2743  IsSame<typename T2::ElementType,Type>::value &&
2744  IsSame<typename T3::ElementType,Type>::value };
2745  };
2746  //**********************************************************************************************
2747 
2748  //**********************************************************************************************
2750 
2752  template< typename T1, typename T2, typename T3, typename T4 >
2753  struct UseDefaultKernel {
2754  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2755  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2756  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2757  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2758  };
2759  //**********************************************************************************************
2760 
2761  //**********************************************************************************************
2763 
2765  template< typename T1, typename T2, typename T3, typename T4 >
2766  struct UseVectorizedDefaultKernel {
2767  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2768  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2769  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2770  IsSame<typename T1::ElementType,T4>::value &&
2771  IntrinsicTrait<typename T1::ElementType>::addition &&
2772  IntrinsicTrait<typename T1::ElementType>::subtraction &&
2773  IntrinsicTrait<typename T1::ElementType>::multiplication };
2774  };
2775  //**********************************************************************************************
2776 
2777  public:
2778  //**Type definitions****************************************************************************
2779  typedef DMatScalarMultExpr<MMM,ST,true> This;
2780  typedef typename MultTrait<RES,ST>::Type ResultType;
2781  typedef typename ResultType::OppositeType OppositeType;
2782  typedef typename ResultType::TransposeType TransposeType;
2783  typedef typename ResultType::ElementType ElementType;
2784  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2785  typedef const ElementType ReturnType;
2786  typedef const ResultType CompositeType;
2787 
2789  typedef const TDMatDMatMultExpr<MT1,MT2> LeftOperand;
2790 
2792  typedef ST RightOperand;
2793 
2795  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2796 
2798  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2799  //**********************************************************************************************
2800 
2801  //**Compilation flags***************************************************************************
2803  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2804  IsSame<ET1,ET2>::value &&
2805  IsSame<ET1,ST>::value &&
2806  IntrinsicTrait<ET1>::addition &&
2807  IntrinsicTrait<ET1>::multiplication };
2808 
2810  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2811  !evaluateRight && MT2::smpAssignable };
2812  //**********************************************************************************************
2813 
2814  //**Constructor*********************************************************************************
2820  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2821  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2822  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2823  {}
2824  //**********************************************************************************************
2825 
2826  //**Access operator*****************************************************************************
2833  inline ResultType operator()( size_t i, size_t j ) const {
2834  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2835  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2836  return matrix_(i,j) * scalar_;
2837  }
2838  //**********************************************************************************************
2839 
2840  //**Rows function*******************************************************************************
2845  inline size_t rows() const {
2846  return matrix_.rows();
2847  }
2848  //**********************************************************************************************
2849 
2850  //**Columns function****************************************************************************
2855  inline size_t columns() const {
2856  return matrix_.columns();
2857  }
2858  //**********************************************************************************************
2859 
2860  //**Left operand access*************************************************************************
2865  inline LeftOperand leftOperand() const {
2866  return matrix_;
2867  }
2868  //**********************************************************************************************
2869 
2870  //**Right operand access************************************************************************
2875  inline RightOperand rightOperand() const {
2876  return scalar_;
2877  }
2878  //**********************************************************************************************
2879 
2880  //**********************************************************************************************
2886  template< typename T >
2887  inline bool canAlias( const T* alias ) const {
2888  return matrix_.canAlias( alias );
2889  }
2890  //**********************************************************************************************
2891 
2892  //**********************************************************************************************
2898  template< typename T >
2899  inline bool isAliased( const T* alias ) const {
2900  return matrix_.isAliased( alias );
2901  }
2902  //**********************************************************************************************
2903 
2904  //**********************************************************************************************
2909  inline bool isAligned() const {
2910  return matrix_.isAligned();
2911  }
2912  //**********************************************************************************************
2913 
2914  //**********************************************************************************************
2919  inline bool canSMPAssign() const {
2920  typename MMM::RightOperand B( matrix_.rightOperand() );
2921  return ( !BLAZE_BLAS_IS_PARALLEL ||
2922  ( rows() * columns() < TDMATDMATMULT_THRESHOLD ) ) &&
2923  ( B.columns() > SMP_TDMATDMATMULT_THRESHOLD );
2924  }
2925  //**********************************************************************************************
2926 
2927  private:
2928  //**Member variables****************************************************************************
2929  LeftOperand matrix_;
2930  RightOperand scalar_;
2931  //**********************************************************************************************
2932 
2933  //**BLAS kernel (single precision)**************************************************************
2934 #if BLAZE_BLAS_MODE
2935 
2949  template< typename MT3 // Type of the left-hand side target matrix
2950  , typename MT4 // Type of the left-hand side matrix operand
2951  , typename MT5 > // Type of the right-hand side matrix operand
2952  static inline void sgemm( MT3& C, const MT4& A, const MT5& B, float alpha, float beta )
2953  {
2954  using boost::numeric_cast;
2955 
2959 
2960  const int M ( numeric_cast<int>( A.rows() ) );
2961  const int N ( numeric_cast<int>( B.columns() ) );
2962  const int K ( numeric_cast<int>( A.columns() ) );
2963  const int lda( numeric_cast<int>( A.spacing() ) );
2964  const int ldb( numeric_cast<int>( B.spacing() ) );
2965  const int ldc( numeric_cast<int>( C.spacing() ) );
2966 
2967  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
2968  cblas_ssymm( CblasRowMajor, CblasLeft, CblasUpper,
2969  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2970  }
2971  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
2972  cblas_ssymm( CblasColMajor, CblasRight, CblasLower,
2973  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2974  }
2975  else {
2976  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2977  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2978  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2979  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2980  }
2981  }
2982 #endif
2983  //**********************************************************************************************
2984 
2985  //**BLAS kernel (double precision)**************************************************************
2986 #if BLAZE_BLAS_MODE
2987 
3001  template< typename MT3 // Type of the left-hand side target matrix
3002  , typename MT4 // Type of the left-hand side matrix operand
3003  , typename MT5 > // Type of the right-hand side matrix operand
3004  static inline void dgemm( MT3& C, const MT4& A, const MT5& B, double alpha, double beta )
3005  {
3006  using boost::numeric_cast;
3007 
3011 
3012  const int M ( numeric_cast<int>( A.rows() ) );
3013  const int N ( numeric_cast<int>( B.columns() ) );
3014  const int K ( numeric_cast<int>( A.columns() ) );
3015  const int lda( numeric_cast<int>( A.spacing() ) );
3016  const int ldb( numeric_cast<int>( B.spacing() ) );
3017  const int ldc( numeric_cast<int>( C.spacing() ) );
3018 
3019  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
3020  cblas_dsymm( CblasRowMajor, CblasLeft, CblasUpper,
3021  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
3022  }
3023  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
3024  cblas_dsymm( CblasColMajor, CblasRight, CblasLower,
3025  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
3026  }
3027  else {
3028  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3029  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3030  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3031  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
3032  }
3033  }
3034 #endif
3035  //**********************************************************************************************
3036 
3037  //**BLAS kernel (single precision complex)******************************************************
3038 #if BLAZE_BLAS_MODE
3039 
3053  template< typename MT3 // Type of the left-hand side target matrix
3054  , typename MT4 // Type of the left-hand side matrix operand
3055  , typename MT5 > // Type of the right-hand side matrix operand
3056  static inline void cgemm( MT3& C, const MT4& A, const MT5& B,
3057  complex<float> alpha, complex<float> beta )
3058  {
3059  using boost::numeric_cast;
3060 
3064  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3065  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3066  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3067 
3068  const int M ( numeric_cast<int>( A.rows() ) );
3069  const int N ( numeric_cast<int>( B.columns() ) );
3070  const int K ( numeric_cast<int>( A.columns() ) );
3071  const int lda( numeric_cast<int>( A.spacing() ) );
3072  const int ldb( numeric_cast<int>( B.spacing() ) );
3073  const int ldc( numeric_cast<int>( C.spacing() ) );
3074 
3075  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
3076  cblas_csymm( CblasRowMajor, CblasLeft, CblasUpper,
3077  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3078  }
3079  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
3080  cblas_csymm( CblasColMajor, CblasRight, CblasLower,
3081  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
3082  }
3083  else {
3084  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3085  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3086  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3087  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3088  }
3089  }
3090 #endif
3091  //**********************************************************************************************
3092 
3093  //**BLAS kernel (double precision complex)******************************************************
3094 #if BLAZE_BLAS_MODE
3095 
3109  template< typename MT3 // Type of the left-hand side target matrix
3110  , typename MT4 // Type of the left-hand side matrix operand
3111  , typename MT5 > // Type of the right-hand side matrix operand
3112  static inline void zgemm( MT3& C, const MT4& A, const MT5& B,
3113  complex<double> alpha, complex<double> beta )
3114  {
3115  using boost::numeric_cast;
3116 
3120  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3121  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3122  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3123 
3124  const int M ( numeric_cast<int>( A.rows() ) );
3125  const int N ( numeric_cast<int>( B.columns() ) );
3126  const int K ( numeric_cast<int>( A.columns() ) );
3127  const int lda( numeric_cast<int>( A.spacing() ) );
3128  const int ldb( numeric_cast<int>( B.spacing() ) );
3129  const int ldc( numeric_cast<int>( C.spacing() ) );
3130 
3131  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
3132  cblas_zsymm( CblasRowMajor, CblasLeft, CblasUpper,
3133  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3134  }
3135  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
3136  cblas_zsymm( CblasColMajor, CblasRight, CblasLower,
3137  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
3138  }
3139  else {
3140  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3141  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3142  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3143  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3144  }
3145  }
3146 #endif
3147  //**********************************************************************************************
3148 
3149  //**Assignment to dense matrices****************************************************************
3161  template< typename MT // Type of the target dense matrix
3162  , bool SO > // Storage order of the target dense matrix
3163  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3164  {
3166 
3167  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3168  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3169 
3170  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3171  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3172 
3173  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
3174  return;
3175  }
3176  else if( left.columns() == 0UL ) {
3177  reset( ~lhs );
3178  return;
3179  }
3180 
3181  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3182  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3183 
3184  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3185  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3186  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3187  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3188  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3189  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3190 
3191  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
3192  }
3193  //**********************************************************************************************
3194 
3195  //**Assignment to dense matrices (kernel selection)*********************************************
3206  template< typename MT3 // Type of the left-hand side target matrix
3207  , typename MT4 // Type of the left-hand side matrix operand
3208  , typename MT5 // Type of the right-hand side matrix operand
3209  , typename ST2 > // Type of the scalar value
3210  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3211  {
3212  if( C.rows() * C.columns() < TDMATDMATMULT_THRESHOLD )
3213  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
3214  else
3215  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
3216  }
3217  //**********************************************************************************************
3218 
3219  //**Default assignment to dense matrices********************************************************
3233  template< typename MT3 // Type of the left-hand side target matrix
3234  , typename MT4 // Type of the left-hand side matrix operand
3235  , typename MT5 // Type of the right-hand side matrix operand
3236  , typename ST2 > // Type of the scalar value
3237  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3238  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3239  {
3240  for( size_t i=0UL; i<A.rows(); ++i ) {
3241  for( size_t k=0UL; k<B.columns(); ++k ) {
3242  C(i,k) = A(i,0UL) * B(0UL,k);
3243  }
3244  for( size_t j=1UL; j<A.columns(); ++j ) {
3245  for( size_t k=0UL; k<B.columns(); ++k ) {
3246  C(i,k) += A(i,j) * B(j,k);
3247  }
3248  }
3249  for( size_t k=0UL; k<B.columns(); ++k ) {
3250  C(i,k) *= scalar;
3251  }
3252  }
3253  }
3254  //**********************************************************************************************
3255 
3256  //**Vectorized default assignment to row-major dense matrices***********************************
3270  template< typename MT3 // Type of the left-hand side target matrix
3271  , typename MT4 // Type of the left-hand side matrix operand
3272  , typename MT5 // Type of the right-hand side matrix operand
3273  , typename ST2 > // Type of the scalar value
3274  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3275  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3276  {
3277  typedef IntrinsicTrait<ElementType> IT;
3278 
3279  const size_t M( A.rows() );
3280  const size_t N( B.columns() );
3281  const size_t K( A.columns() );
3282 
3283  const IntrinsicType factor( set( scalar ) );
3284 
3285  size_t j( 0UL );
3286 
3287  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3288  for( size_t i=0UL; i<M; ++i ) {
3289  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3290  for( size_t k=0UL; k<K; ++k ) {
3291  const IntrinsicType a1( set( A(i,k) ) );
3292  xmm1 = xmm1 + a1 * B.load(k,j );
3293  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3294  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3295  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3296  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3297  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3298  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3299  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3300  }
3301  (~C).store( i, j , xmm1 * factor );
3302  (~C).store( i, j+IT::size , xmm2 * factor );
3303  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
3304  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
3305  (~C).store( i, j+IT::size*4UL, xmm5 * factor );
3306  (~C).store( i, j+IT::size*5UL, xmm6 * factor );
3307  (~C).store( i, j+IT::size*6UL, xmm7 * factor );
3308  (~C).store( i, j+IT::size*7UL, xmm8 * factor );
3309  }
3310  }
3311  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3312  size_t i( 0UL );
3313  for( ; (i+2UL) <= M; i+=2UL ) {
3314  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3315  for( size_t k=0UL; k<K; ++k ) {
3316  const IntrinsicType a1( set( A(i ,k) ) );
3317  const IntrinsicType a2( set( A(i+1UL,k) ) );
3318  const IntrinsicType b1( B.load(k,j ) );
3319  const IntrinsicType b2( B.load(k,j+IT::size ) );
3320  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3321  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3322  xmm1 = xmm1 + a1 * b1;
3323  xmm2 = xmm2 + a1 * b2;
3324  xmm3 = xmm3 + a1 * b3;
3325  xmm4 = xmm4 + a1 * b4;
3326  xmm5 = xmm5 + a2 * b1;
3327  xmm6 = xmm6 + a2 * b2;
3328  xmm7 = xmm7 + a2 * b3;
3329  xmm8 = xmm8 + a2 * b4;
3330  }
3331  (~C).store( i , j , xmm1 * factor );
3332  (~C).store( i , j+IT::size , xmm2 * factor );
3333  (~C).store( i , j+IT::size*2UL, xmm3 * factor );
3334  (~C).store( i , j+IT::size*3UL, xmm4 * factor );
3335  (~C).store( i+1UL, j , xmm5 * factor );
3336  (~C).store( i+1UL, j+IT::size , xmm6 * factor );
3337  (~C).store( i+1UL, j+IT::size*2UL, xmm7 * factor );
3338  (~C).store( i+1UL, j+IT::size*3UL, xmm8 * factor );
3339  }
3340  if( i < M ) {
3341  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3342  for( size_t k=0UL; k<K; ++k ) {
3343  const IntrinsicType a1( set( A(i,k) ) );
3344  xmm1 = xmm1 + a1 * B.load(k,j );
3345  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3346  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3347  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3348  }
3349  (~C).store( i, j , xmm1 * factor );
3350  (~C).store( i, j+IT::size , xmm2 * factor );
3351  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
3352  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
3353  }
3354  }
3355  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3356  size_t i( 0UL );
3357  for( ; (i+2UL) <= M; i+=2UL ) {
3358  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3359  for( size_t k=0UL; k<K; ++k ) {
3360  const IntrinsicType a1( set( A(i ,k) ) );
3361  const IntrinsicType a2( set( A(i+1UL,k) ) );
3362  const IntrinsicType b1( B.load(k,j ) );
3363  const IntrinsicType b2( B.load(k,j+IT::size) );
3364  xmm1 = xmm1 + a1 * b1;
3365  xmm2 = xmm2 + a1 * b2;
3366  xmm3 = xmm3 + a2 * b1;
3367  xmm4 = xmm4 + a2 * b2;
3368  }
3369  (~C).store( i , j , xmm1 * factor );
3370  (~C).store( i , j+IT::size, xmm2 * factor );
3371  (~C).store( i+1UL, j , xmm3 * factor );
3372  (~C).store( i+1UL, j+IT::size, xmm4 * factor );
3373  }
3374  if( i < M ) {
3375  IntrinsicType xmm1, xmm2;
3376  for( size_t k=0UL; k<K; ++k ) {
3377  const IntrinsicType a1( set( A(i,k) ) );
3378  xmm1 = xmm1 + a1 * B.load(k,j );
3379  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3380  }
3381  (~C).store( i, j , xmm1 * factor );
3382  (~C).store( i, j+IT::size, xmm2 * factor );
3383  }
3384  }
3385  if( j < N ) {
3386  size_t i( 0UL );
3387  for( ; (i+2UL) <= M; i+=2UL ) {
3388  IntrinsicType xmm1, xmm2;
3389  for( size_t k=0UL; k<K; ++k ) {
3390  const IntrinsicType b1( B.load(k,j) );
3391  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3392  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3393  }
3394  (~C).store( i , j, xmm1 * factor );
3395  (~C).store( i+1UL, j, xmm2 * factor );
3396  }
3397  if( i < M ) {
3398  IntrinsicType xmm1;
3399  for( size_t k=0UL; k<K; ++k ) {
3400  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
3401  }
3402  (~C).store( i, j, xmm1 * factor );
3403  }
3404  }
3405  }
3406  //**********************************************************************************************
3407 
3408  //**Vectorized default assignment to column-major dense matrices********************************
3422  template< typename MT3 // Type of the left-hand side target matrix
3423  , typename MT4 // Type of the left-hand side matrix operand
3424  , typename MT5 // Type of the right-hand side matrix operand
3425  , typename ST2 > // Type of the scalar value
3426  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3427  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3428  {
3429  typedef IntrinsicTrait<ElementType> IT;
3430 
3431  const size_t M( A.rows() );
3432  const size_t N( B.columns() );
3433  const size_t K( A.columns() );
3434 
3435  const IntrinsicType factor( set( scalar ) );
3436 
3437  size_t i( 0UL );
3438 
3439  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3440  for( size_t j=0UL; j<N; ++j ) {
3441  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3442  for( size_t k=0UL; k<K; ++k ) {
3443  const IntrinsicType b1( set( B(k,j) ) );
3444  xmm1 = xmm1 + A.load(i ,k) * b1;
3445  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3446  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3447  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3448  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3449  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3450  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3451  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3452  }
3453  (~C).store( i , j, xmm1 * factor );
3454  (~C).store( i+IT::size , j, xmm2 * factor );
3455  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
3456  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
3457  (~C).store( i+IT::size*4UL, j, xmm5 * factor );
3458  (~C).store( i+IT::size*5UL, j, xmm6 * factor );
3459  (~C).store( i+IT::size*6UL, j, xmm7 * factor );
3460  (~C).store( i+IT::size*7UL, j, xmm8 * factor );
3461  }
3462  }
3463  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3464  size_t j( 0UL );
3465  for( ; (j+2UL) <= N; j+=2UL ) {
3466  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3467  for( size_t k=0UL; k<K; ++k ) {
3468  const IntrinsicType a1( A.load(i ,k) );
3469  const IntrinsicType a2( A.load(i+IT::size ,k) );
3470  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
3471  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
3472  const IntrinsicType b1( set( B(k,j ) ) );
3473  const IntrinsicType b2( set( B(k,j+1UL) ) );
3474  xmm1 = xmm1 + a1 * b1;
3475  xmm2 = xmm2 + a2 * b1;
3476  xmm3 = xmm3 + a3 * b1;
3477  xmm4 = xmm4 + a4 * b1;
3478  xmm5 = xmm5 + a1 * b2;
3479  xmm6 = xmm6 + a2 * b2;
3480  xmm7 = xmm7 + a3 * b2;
3481  xmm8 = xmm8 + a4 * b2;
3482  }
3483  (~C).store( i , j , xmm1 * factor );
3484  (~C).store( i+IT::size , j , xmm2 * factor );
3485  (~C).store( i+IT::size*2UL, j , xmm3 * factor );
3486  (~C).store( i+IT::size*3UL, j , xmm4 * factor );
3487  (~C).store( i , j+1UL, xmm5 * factor );
3488  (~C).store( i+IT::size , j+1UL, xmm6 * factor );
3489  (~C).store( i+IT::size*2UL, j+1UL, xmm7 * factor );
3490  (~C).store( i+IT::size*3UL, j+1UL, xmm8 * factor );
3491  }
3492  if( j < N ) {
3493  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3494  for( size_t k=0UL; k<K; ++k ) {
3495  const IntrinsicType b1( set( B(k,j) ) );
3496  xmm1 = xmm1 + A.load(i ,k) * b1;
3497  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3498  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3499  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3500  }
3501  (~C).store( i , j, xmm1 * factor );
3502  (~C).store( i+IT::size , j, xmm2 * factor );
3503  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
3504  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
3505  }
3506  }
3507  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3508  size_t j( 0UL );
3509  for( ; (j+2UL) <= N; j+=2UL ) {
3510  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3511  for( size_t k=0UL; k<K; ++k ) {
3512  const IntrinsicType a1( A.load(i ,k) );
3513  const IntrinsicType a2( A.load(i+IT::size,k) );
3514  const IntrinsicType b1( set( B(k,j ) ) );
3515  const IntrinsicType b2( set( B(k,j+1UL) ) );
3516  xmm1 = xmm1 + a1 * b1;
3517  xmm2 = xmm2 + a2 * b1;
3518  xmm3 = xmm3 + a1 * b2;
3519  xmm4 = xmm4 + a2 * b2;
3520  }
3521  (~C).store( i , j , xmm1 * factor );
3522  (~C).store( i+IT::size, j , xmm2 * factor );
3523  (~C).store( i , j+1UL, xmm3 * factor );
3524  (~C).store( i+IT::size, j+1UL, xmm4 * factor );
3525  }
3526  if( j < N ) {
3527  IntrinsicType xmm1, xmm2;
3528  for( size_t k=0UL; k<K; ++k ) {
3529  const IntrinsicType b1( set( B(k,j) ) );
3530  xmm1 = xmm1 + A.load(i ,k) * b1;
3531  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3532  }
3533  (~C).store( i , j, xmm1 * factor );
3534  (~C).store( i+IT::size, j, xmm2 * factor );
3535  }
3536  }
3537  if( i < M ) {
3538  size_t j( 0UL );
3539  for( ; (j+2UL) <= N; j+=2UL ) {
3540  IntrinsicType xmm1, xmm2;
3541  for( size_t k=0UL; k<K; ++k ) {
3542  const IntrinsicType a1( A.load(i,k) );
3543  xmm1 = xmm1 + a1 * set( B(k,j ) );
3544  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3545  }
3546  (~C).store( i, j , xmm1 * factor );
3547  (~C).store( i, j+1UL, xmm2 * factor );
3548  }
3549  if( j < N ) {
3550  IntrinsicType xmm1;
3551  for( size_t k=0UL; k<K; ++k ) {
3552  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
3553  }
3554  (~C).store( i, j, xmm1 * factor );
3555  }
3556  }
3557  }
3558  //**********************************************************************************************
3559 
3560  //**BLAS-based assignment to dense matrices (default)*******************************************
3574  template< typename MT3 // Type of the left-hand side target matrix
3575  , typename MT4 // Type of the left-hand side matrix operand
3576  , typename MT5 // Type of the right-hand side matrix operand
3577  , typename ST2 > // Type of the scalar value
3578  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3579  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3580  {
3581  selectDefaultAssignKernel( C, A, B, scalar );
3582  }
3583  //**********************************************************************************************
3584 
3585  //**BLAS-based assignment to dense matrices (single precision)**********************************
3586 #if BLAZE_BLAS_MODE
3587 
3600  template< typename MT3 // Type of the left-hand side target matrix
3601  , typename MT4 // Type of the left-hand side matrix operand
3602  , typename MT5 // Type of the right-hand side matrix operand
3603  , typename ST2 > // Type of the scalar value
3604  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3605  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3606  {
3607  sgemm( C, A, B, scalar, 0.0F );
3608  }
3609 #endif
3610  //**********************************************************************************************
3611 
3612  //**BLAS-based assignment to dense matrices (double precision)**********************************
3613 #if BLAZE_BLAS_MODE
3614 
3627  template< typename MT3 // Type of the left-hand side target matrix
3628  , typename MT4 // Type of the left-hand side matrix operand
3629  , typename MT5 // Type of the right-hand side matrix operand
3630  , typename ST2 > // Type of the scalar value
3631  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3632  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3633  {
3634  dgemm( C, A, B, scalar, 0.0 );
3635  }
3636 #endif
3637  //**********************************************************************************************
3638 
3639  //**BLAS-based assignment to dense matrices (single precision complex)**************************
3640 #if BLAZE_BLAS_MODE
3641 
3654  template< typename MT3 // Type of the left-hand side target matrix
3655  , typename MT4 // Type of the left-hand side matrix operand
3656  , typename MT5 // Type of the right-hand side matrix operand
3657  , typename ST2 > // Type of the scalar value
3658  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3659  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3660  {
3661  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3662  }
3663 #endif
3664  //**********************************************************************************************
3665 
3666  //**BLAS-based assignment to dense matrices (double precision complex)**************************
3667 #if BLAZE_BLAS_MODE
3668 
3681  template< typename MT3 // Type of the left-hand side target matrix
3682  , typename MT4 // Type of the left-hand side matrix operand
3683  , typename MT5 // Type of the right-hand side matrix operand
3684  , typename ST2 > // Type of the scalar value
3685  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3686  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3687  {
3688  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3689  }
3690 #endif
3691  //**********************************************************************************************
3692 
3693  //**Assignment to sparse matrices***************************************************************
3705  template< typename MT // Type of the target sparse matrix
3706  , bool SO > // Storage order of the target sparse matrix
3707  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3708  {
3710 
3711  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
3712 
3719 
3720  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3721  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3722 
3723  const TmpType tmp( serial( rhs ) );
3724  assign( ~lhs, tmp );
3725  }
3726  //**********************************************************************************************
3727 
3728  //**Addition assignment to dense matrices*******************************************************
3740  template< typename MT // Type of the target dense matrix
3741  , bool SO > // Storage order of the target dense matrix
3742  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3743  {
3745 
3746  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3747  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3748 
3749  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3750  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3751 
3752  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3753  return;
3754  }
3755 
3756  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3757  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3758 
3759  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3760  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3761  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3762  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3763  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3764  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3765 
3766  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3767  }
3768  //**********************************************************************************************
3769 
3770  //**Addition assignment to dense matrices (kernel selection)************************************
3781  template< typename MT3 // Type of the left-hand side target matrix
3782  , typename MT4 // Type of the left-hand side matrix operand
3783  , typename MT5 // Type of the right-hand side matrix operand
3784  , typename ST2 > // Type of the scalar value
3785  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3786  {
3787  if( C.rows() * C.columns() < TDMATDMATMULT_THRESHOLD )
3788  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3789  else
3790  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3791  }
3792  //**********************************************************************************************
3793 
3794  //**Default addition assignment to dense matrices***********************************************
3808  template< typename MT3 // Type of the left-hand side target matrix
3809  , typename MT4 // Type of the left-hand side matrix operand
3810  , typename MT5 // Type of the right-hand side matrix operand
3811  , typename ST2 > // Type of the scalar value
3812  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3813  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3814  {
3815  const ResultType tmp( serial( A * B * scalar ) );
3816  addAssign( C, tmp );
3817  }
3818  //**********************************************************************************************
3819 
3820  //**Vectorized default addition assignment to row-major dense matrices**************************
3834  template< typename MT3 // Type of the left-hand side target matrix
3835  , typename MT4 // Type of the left-hand side matrix operand
3836  , typename MT5 // Type of the right-hand side matrix operand
3837  , typename ST2 > // Type of the scalar value
3838  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3839  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3840  {
3841  typedef IntrinsicTrait<ElementType> IT;
3842 
3843  const size_t M( A.rows() );
3844  const size_t N( B.columns() );
3845  const size_t K( A.columns() );
3846 
3847  const IntrinsicType factor( set( scalar ) );
3848 
3849  size_t j( 0UL );
3850 
3851  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3852  for( size_t i=0UL; i<M; ++i ) {
3853  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3854  for( size_t k=0UL; k<K; ++k ) {
3855  const IntrinsicType a1( set( A(i,k) ) );
3856  xmm1 = xmm1 + a1 * B.load(k,j );
3857  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3858  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3859  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3860  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3861  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3862  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3863  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3864  }
3865  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3866  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3867  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3868  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3869  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) + xmm5 * factor );
3870  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) + xmm6 * factor );
3871  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) + xmm7 * factor );
3872  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) + xmm8 * factor );
3873  }
3874  }
3875  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3876  size_t i( 0UL );
3877  for( ; (i+2UL) <= M; i+=2UL ) {
3878  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3879  for( size_t k=0UL; k<K; ++k ) {
3880  const IntrinsicType a1( set( A(i ,k) ) );
3881  const IntrinsicType a2( set( A(i+1UL,k) ) );
3882  const IntrinsicType b1( B.load(k,j ) );
3883  const IntrinsicType b2( B.load(k,j+IT::size ) );
3884  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3885  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3886  xmm1 = xmm1 + a1 * b1;
3887  xmm2 = xmm2 + a1 * b2;
3888  xmm3 = xmm3 + a1 * b3;
3889  xmm4 = xmm4 + a1 * b4;
3890  xmm5 = xmm5 + a2 * b1;
3891  xmm6 = xmm6 + a2 * b2;
3892  xmm7 = xmm7 + a2 * b3;
3893  xmm8 = xmm8 + a2 * b4;
3894  }
3895  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3896  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) + xmm2 * factor );
3897  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) + xmm3 * factor );
3898  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) + xmm4 * factor );
3899  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
3900  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) + xmm6 * factor );
3901  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) + xmm7 * factor );
3902  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) + xmm8 * factor );
3903  }
3904  if( i < M ) {
3905  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3906  for( size_t k=0UL; k<K; ++k ) {
3907  const IntrinsicType a1( set( A(i,k) ) );
3908  xmm1 = xmm1 + a1 * B.load(k,j );
3909  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3910  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3911  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3912  }
3913  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3914  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3915  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3916  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3917  }
3918  }
3919  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3920  size_t i( 0UL );
3921  for( ; (i+2UL) <= M; i+=2UL ) {
3922  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3923  for( size_t k=0UL; k<K; ++k ) {
3924  const IntrinsicType a1( set( A(i ,k) ) );
3925  const IntrinsicType a2( set( A(i+1UL,k) ) );
3926  const IntrinsicType b1( B.load(k,j ) );
3927  const IntrinsicType b2( B.load(k,j+IT::size) );
3928  xmm1 = xmm1 + a1 * b1;
3929  xmm2 = xmm2 + a1 * b2;
3930  xmm3 = xmm3 + a2 * b1;
3931  xmm4 = xmm4 + a2 * b2;
3932  }
3933  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3934  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) + xmm2 * factor );
3935  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
3936  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) + xmm4 * factor );
3937  }
3938  if( i < M ) {
3939  IntrinsicType xmm1, xmm2;
3940  for( size_t k=0UL; k<K; ++k ) {
3941  const IntrinsicType a1( set( A(i,k) ) );
3942  xmm1 = xmm1 + a1 * B.load(k,j );
3943  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3944  }
3945  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3946  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) + xmm2 * factor );
3947  }
3948  }
3949  if( j < N ) {
3950  size_t i( 0UL );
3951  for( ; (i+2UL) <= M; i+=2UL ) {
3952  IntrinsicType xmm1, xmm2;
3953  for( size_t k=0UL; k<K; ++k ) {
3954  const IntrinsicType b1( B.load(k,j) );
3955  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3956  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3957  }
3958  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3959  (~C).store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
3960  }
3961  if( i < M ) {
3962  IntrinsicType xmm1;
3963  for( size_t k=0UL; k<K; ++k ) {
3964  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
3965  }
3966  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
3967  }
3968  }
3969  }
3970  //**********************************************************************************************
3971 
3972  //**Vectorized default addition assignment to column-major dense matrices***********************
3986  template< typename MT3 // Type of the left-hand side target matrix
3987  , typename MT4 // Type of the left-hand side matrix operand
3988  , typename MT5 // Type of the right-hand side matrix operand
3989  , typename ST2 > // Type of the scalar value
3990  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3991  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3992  {
3993  typedef IntrinsicTrait<ElementType> IT;
3994 
3995  const size_t M( A.rows() );
3996  const size_t N( B.columns() );
3997  const size_t K( A.columns() );
3998 
3999  const IntrinsicType factor( set( scalar ) );
4000 
4001  size_t i( 0UL );
4002 
4003  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
4004  for( size_t j=0UL; j<N; ++j ) {
4005  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4006  for( size_t k=0UL; k<K; ++k ) {
4007  const IntrinsicType b1( set( B(k,j) ) );
4008  xmm1 = xmm1 + A.load(i ,k) * b1;
4009  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
4010  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
4011  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
4012  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
4013  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
4014  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
4015  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
4016  }
4017  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
4018  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
4019  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
4020  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
4021  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) + xmm5 * factor );
4022  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) + xmm6 * factor );
4023  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) + xmm7 * factor );
4024  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) + xmm8 * factor );
4025  }
4026  }
4027  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
4028  size_t j( 0UL );
4029  for( ; (j+2UL) <= N; j+=2UL ) {
4030  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4031  for( size_t k=0UL; k<K; ++k ) {
4032  const IntrinsicType a1( A.load(i ,k) );
4033  const IntrinsicType a2( A.load(i+IT::size ,k) );
4034  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
4035  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
4036  const IntrinsicType b1( set( B(k,j ) ) );
4037  const IntrinsicType b2( set( B(k,j+1UL) ) );
4038  xmm1 = xmm1 + a1 * b1;
4039  xmm2 = xmm2 + a2 * b1;
4040  xmm3 = xmm3 + a3 * b1;
4041  xmm4 = xmm4 + a4 * b1;
4042  xmm5 = xmm5 + a1 * b2;
4043  xmm6 = xmm6 + a2 * b2;
4044  xmm7 = xmm7 + a3 * b2;
4045  xmm8 = xmm8 + a4 * b2;
4046  }
4047  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
4048  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) + xmm2 * factor );
4049  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) + xmm3 * factor );
4050  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) + xmm4 * factor );
4051  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
4052  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) + xmm6 * factor );
4053  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) + xmm7 * factor );
4054  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) + xmm8 * factor );
4055  }
4056  if( j < N ) {
4057  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4058  for( size_t k=0UL; k<K; ++k ) {
4059  const IntrinsicType b1( set( B(k,j) ) );
4060  xmm1 = xmm1 + A.load(i ,k) * b1;
4061  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
4062  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
4063  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
4064  }
4065  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
4066  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
4067  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
4068  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
4069  }
4070  }
4071  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
4072  size_t j( 0UL );
4073  for( ; (j+2UL) <= N; j+=2UL ) {
4074  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4075  for( size_t k=0UL; k<K; ++k ) {
4076  const IntrinsicType a1( A.load(i ,k) );
4077  const IntrinsicType a2( A.load(i+IT::size,k) );
4078  const IntrinsicType b1( set( B(k,j ) ) );
4079  const IntrinsicType b2( set( B(k,j+1UL) ) );
4080  xmm1 = xmm1 + a1 * b1;
4081  xmm2 = xmm2 + a2 * b1;
4082  xmm3 = xmm3 + a1 * b2;
4083  xmm4 = xmm4 + a2 * b2;
4084  }
4085  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
4086  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) + xmm2 * factor );
4087  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
4088  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) + xmm4 * factor );
4089  }
4090  if( j < N ) {
4091  IntrinsicType xmm1, xmm2;
4092  for( size_t k=0UL; k<K; ++k ) {
4093  const IntrinsicType b1( set( B(k,j) ) );
4094  xmm1 = xmm1 + A.load(i ,k) * b1;
4095  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
4096  }
4097  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
4098  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) + xmm2 * factor );
4099  }
4100  }
4101  if( i < M ) {
4102  size_t j( 0UL );
4103  for( ; (j+2UL) <= N; j+=2UL ) {
4104  IntrinsicType xmm1, xmm2;
4105  for( size_t k=0UL; k<K; ++k ) {
4106  const IntrinsicType a1( A.load(i,k) );
4107  xmm1 = xmm1 + a1 * set( B(k,j ) );
4108  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
4109  }
4110  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
4111  (~C).store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
4112  }
4113  if( j < N ) {
4114  IntrinsicType xmm1;
4115  for( size_t k=0UL; k<K; ++k ) {
4116  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
4117  }
4118  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
4119  }
4120  }
4121  }
4122  //**********************************************************************************************
4123 
4124  //**BLAS-based addition assignment to dense matrices (default)**********************************
4138  template< typename MT3 // Type of the left-hand side target matrix
4139  , typename MT4 // Type of the left-hand side matrix operand
4140  , typename MT5 // Type of the right-hand side matrix operand
4141  , typename ST2 > // Type of the scalar value
4142  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4143  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4144  {
4145  selectDefaultAddAssignKernel( C, A, B, scalar );
4146  }
4147  //**********************************************************************************************
4148 
4149  //**BLAS-based addition assignment to dense matrices (single precision)*************************
4150 #if BLAZE_BLAS_MODE
4151 
4164  template< typename MT3 // Type of the left-hand side target matrix
4165  , typename MT4 // Type of the left-hand side matrix operand
4166  , typename MT5 // Type of the right-hand side matrix operand
4167  , typename ST2 > // Type of the scalar value
4168  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4169  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4170  {
4171  sgemm( C, A, B, scalar, 1.0F );
4172  }
4173 #endif
4174  //**********************************************************************************************
4175 
4176  //**BLAS-based addition assignment to dense matrices (double precision)*************************
4177 #if BLAZE_BLAS_MODE
4178 
4191  template< typename MT3 // Type of the left-hand side target matrix
4192  , typename MT4 // Type of the left-hand side matrix operand
4193  , typename MT5 // Type of the right-hand side matrix operand
4194  , typename ST2 > // Type of the scalar value
4195  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4196  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4197  {
4198  dgemm( C, A, B, scalar, 1.0 );
4199  }
4200 #endif
4201  //**********************************************************************************************
4202 
4203  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
4204 #if BLAZE_BLAS_MODE
4205 
4218  template< typename MT3 // Type of the left-hand side target matrix
4219  , typename MT4 // Type of the left-hand side matrix operand
4220  , typename MT5 // Type of the right-hand side matrix operand
4221  , typename ST2 > // Type of the scalar value
4222  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4223  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4224  {
4225  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4226  }
4227 #endif
4228  //**********************************************************************************************
4229 
4230  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
4231 #if BLAZE_BLAS_MODE
4232 
4245  template< typename MT3 // Type of the left-hand side target matrix
4246  , typename MT4 // Type of the left-hand side matrix operand
4247  , typename MT5 // Type of the right-hand side matrix operand
4248  , typename ST2 > // Type of the scalar value
4249  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4250  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4251  {
4252  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4253  }
4254 #endif
4255  //**********************************************************************************************
4256 
4257  //**Addition assignment to sparse matrices******************************************************
4258  // No special implementation for the addition assignment to sparse matrices.
4259  //**********************************************************************************************
4260 
4261  //**Subtraction assignment to dense matrices****************************************************
4273  template< typename MT // Type of the target dense matrix
4274  , bool SO > // Storage order of the target dense matrix
4275  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4276  {
4278 
4279  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4280  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4281 
4282  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4283  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4284 
4285  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4286  return;
4287  }
4288 
4289  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4290  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4291 
4292  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4293  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4294  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4295  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4296  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4297  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4298 
4299  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
4300  }
4301  //**********************************************************************************************
4302 
4303  //**Subtraction assignment to dense matrices (kernel selection)*********************************
4314  template< typename MT3 // Type of the left-hand side target matrix
4315  , typename MT4 // Type of the left-hand side matrix operand
4316  , typename MT5 // Type of the right-hand side matrix operand
4317  , typename ST2 > // Type of the scalar value
4318  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4319  {
4320  if( C.rows() * C.columns() < TDMATDMATMULT_THRESHOLD )
4321  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
4322  else
4323  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
4324  }
4325  //**********************************************************************************************
4326 
4327  //**Default subtraction assignment to dense matrices********************************************
4341  template< typename MT3 // Type of the left-hand side target matrix
4342  , typename MT4 // Type of the left-hand side matrix operand
4343  , typename MT5 // Type of the right-hand side matrix operand
4344  , typename ST2 > // Type of the scalar value
4345  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4346  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4347  {
4348  const ResultType tmp( serial( A * B * scalar ) );
4349  subAssign( C, tmp );
4350  }
4351  //**********************************************************************************************
4352 
4353  //**Vectorized default subtraction assignment to row-major dense matrices***********************
4367  template< typename MT3 // Type of the left-hand side target matrix
4368  , typename MT4 // Type of the left-hand side matrix operand
4369  , typename MT5 // Type of the right-hand side matrix operand
4370  , typename ST2 > // Type of the scalar value
4371  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4372  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4373  {
4374  typedef IntrinsicTrait<ElementType> IT;
4375 
4376  const size_t M( A.rows() );
4377  const size_t N( B.columns() );
4378  const size_t K( A.columns() );
4379 
4380  const IntrinsicType factor( set( scalar ) );
4381 
4382  size_t j( 0UL );
4383 
4384  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
4385  for( size_t i=0UL; i<M; ++i ) {
4386  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4387  for( size_t k=0UL; k<K; ++k ) {
4388  const IntrinsicType a1( set( A(i,k) ) );
4389  xmm1 = xmm1 + a1 * B.load(k,j );
4390  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
4391  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
4392  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
4393  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
4394  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
4395  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
4396  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
4397  }
4398  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
4399  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
4400  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
4401  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
4402  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) - xmm5 * factor );
4403  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) - xmm6 * factor );
4404  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) - xmm7 * factor );
4405  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) - xmm8 * factor );
4406  }
4407  }
4408  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
4409  size_t i( 0UL );
4410  for( ; (i+2UL) <= M; i+=2UL ) {
4411  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4412  for( size_t k=0UL; k<K; ++k ) {
4413  const IntrinsicType a1( set( A(i ,k) ) );
4414  const IntrinsicType a2( set( A(i+1UL,k) ) );
4415  const IntrinsicType b1( B.load(k,j ) );
4416  const IntrinsicType b2( B.load(k,j+IT::size ) );
4417  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
4418  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
4419  xmm1 = xmm1 + a1 * b1;
4420  xmm2 = xmm2 + a1 * b2;
4421  xmm3 = xmm3 + a1 * b3;
4422  xmm4 = xmm4 + a1 * b4;
4423  xmm5 = xmm5 + a2 * b1;
4424  xmm6 = xmm6 + a2 * b2;
4425  xmm7 = xmm7 + a2 * b3;
4426  xmm8 = xmm8 + a2 * b4;
4427  }
4428  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4429  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) - xmm2 * factor );
4430  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) - xmm3 * factor );
4431  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) - xmm4 * factor );
4432  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
4433  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) - xmm6 * factor );
4434  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) - xmm7 * factor );
4435  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) - xmm8 * factor );
4436  }
4437  if( i < M ) {
4438  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4439  for( size_t k=0UL; k<K; ++k ) {
4440  const IntrinsicType a1( set( A(i,k) ) );
4441  xmm1 = xmm1 + a1 * B.load(k,j );
4442  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
4443  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
4444  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
4445  }
4446  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
4447  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
4448  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
4449  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
4450  }
4451  }
4452  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
4453  size_t i( 0UL );
4454  for( ; (i+2UL) <= M; i+=2UL ) {
4455  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4456  for( size_t k=0UL; k<K; ++k ) {
4457  const IntrinsicType a1( set( A(i ,k) ) );
4458  const IntrinsicType a2( set( A(i+1UL,k) ) );
4459  const IntrinsicType b1( B.load(k,j ) );
4460  const IntrinsicType b2( B.load(k,j+IT::size) );
4461  xmm1 = xmm1 + a1 * b1;
4462  xmm2 = xmm2 + a1 * b2;
4463  xmm3 = xmm3 + a2 * b1;
4464  xmm4 = xmm4 + a2 * b2;
4465  }
4466  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4467  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) - xmm2 * factor );
4468  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
4469  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) - xmm4 * factor );
4470  }
4471  if( i < M ) {
4472  IntrinsicType xmm1, xmm2;
4473  for( size_t k=0UL; k<K; ++k ) {
4474  const IntrinsicType a1( set( A(i,k) ) );
4475  xmm1 = xmm1 + a1 * B.load(k,j );
4476  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
4477  }
4478  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
4479  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) - xmm2 * factor );
4480  }
4481  }
4482  if( j < N ) {
4483  size_t i( 0UL );
4484  for( ; (i+2UL) <= M; i+=2UL ) {
4485  IntrinsicType xmm1, xmm2;
4486  for( size_t k=0UL; k<K; ++k ) {
4487  const IntrinsicType b1( B.load(k,j) );
4488  xmm1 = xmm1 + set( A(i ,k) ) * b1;
4489  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
4490  }
4491  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
4492  (~C).store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
4493  }
4494  if( i < M ) {
4495  IntrinsicType xmm1;
4496  for( size_t k=0UL; k<K; ++k ) {
4497  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
4498  }
4499  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
4500  }
4501  }
4502  }
4503  //**********************************************************************************************
4504 
4505  //**Vectorized default subtraction assignment to column-major dense matrices********************
4519  template< typename MT3 // Type of the left-hand side target matrix
4520  , typename MT4 // Type of the left-hand side matrix operand
4521  , typename MT5 // Type of the right-hand side matrix operand
4522  , typename ST2 > // Type of the scalar value
4523  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4524  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4525  {
4526  typedef IntrinsicTrait<ElementType> IT;
4527 
4528  const size_t M( A.rows() );
4529  const size_t N( B.columns() );
4530  const size_t K( A.columns() );
4531 
4532  const IntrinsicType factor( set( scalar ) );
4533 
4534  size_t i( 0UL );
4535 
4536  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
4537  for( size_t j=0UL; j<N; ++j ) {
4538  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4539  for( size_t k=0UL; k<K; ++k ) {
4540  const IntrinsicType b1( set( B(k,j) ) );
4541  xmm1 = xmm1 + A.load(i ,k) * b1;
4542  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
4543  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
4544  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
4545  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
4546  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
4547  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
4548  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
4549  }
4550  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
4551  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
4552  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
4553  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
4554  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) - xmm5 * factor );
4555  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) - xmm6 * factor );
4556  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) - xmm7 * factor );
4557  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) - xmm8 * factor );
4558  }
4559  }
4560  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
4561  size_t j( 0UL );
4562  for( ; (j+2UL) <= N; j+=2UL ) {
4563  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4564  for( size_t k=0UL; k<K; ++k ) {
4565  const IntrinsicType a1( A.load(i ,k) );
4566  const IntrinsicType a2( A.load(i+IT::size ,k) );
4567  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
4568  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
4569  const IntrinsicType b1( set( B(k,j ) ) );
4570  const IntrinsicType b2( set( B(k,j+1UL) ) );
4571  xmm1 = xmm1 + a1 * b1;
4572  xmm2 = xmm2 + a2 * b1;
4573  xmm3 = xmm3 + a3 * b1;
4574  xmm4 = xmm4 + a4 * b1;
4575  xmm5 = xmm5 + a1 * b2;
4576  xmm6 = xmm6 + a2 * b2;
4577  xmm7 = xmm7 + a3 * b2;
4578  xmm8 = xmm8 + a4 * b2;
4579  }
4580  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4581  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) - xmm2 * factor );
4582  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) - xmm3 * factor );
4583  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) - xmm4 * factor );
4584  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
4585  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) - xmm6 * factor );
4586  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) - xmm7 * factor );
4587  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) - xmm8 * factor );
4588  }
4589  if( j < N ) {
4590  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4591  for( size_t k=0UL; k<K; ++k ) {
4592  const IntrinsicType b1( set( B(k,j) ) );
4593  xmm1 = xmm1 + A.load(i ,k) * b1;
4594  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
4595  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
4596  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
4597  }
4598  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
4599  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
4600  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
4601  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
4602  }
4603  }
4604  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
4605  size_t j( 0UL );
4606  for( ; (j+2UL) <= N; j+=2UL ) {
4607  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4608  for( size_t k=0UL; k<K; ++k ) {
4609  const IntrinsicType a1( A.load(i ,k) );
4610  const IntrinsicType a2( A.load(i+IT::size,k) );
4611  const IntrinsicType b1( set( B(k,j ) ) );
4612  const IntrinsicType b2( set( B(k,j+1UL) ) );
4613  xmm1 = xmm1 + a1 * b1;
4614  xmm2 = xmm2 + a2 * b1;
4615  xmm3 = xmm3 + a1 * b2;
4616  xmm4 = xmm4 + a2 * b2;
4617  }
4618  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4619  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) - xmm2 * factor );
4620  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
4621  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) - xmm4 * factor );
4622  }
4623  if( j < N ) {
4624  IntrinsicType xmm1, xmm2;
4625  for( size_t k=0UL; k<K; ++k ) {
4626  const IntrinsicType b1( set( B(k,j) ) );
4627  xmm1 = xmm1 + A.load(i ,k) * b1;
4628  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
4629  }
4630  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
4631  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) - xmm2 * factor );
4632  }
4633  }
4634  if( i < M ) {
4635  size_t j( 0UL );
4636  for( ; (j+2UL) <= N; j+=2UL ) {
4637  IntrinsicType xmm1, xmm2;
4638  for( size_t k=0UL; k<K; ++k ) {
4639  const IntrinsicType a1( A.load(i,k) );
4640  xmm1 = xmm1 + a1 * set( B(k,j ) );
4641  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
4642  }
4643  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
4644  (~C).store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
4645  }
4646  if( j < N ) {
4647  IntrinsicType xmm1;
4648  for( size_t k=0UL; k<K; ++k ) {
4649  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
4650  }
4651  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
4652  }
4653  }
4654  }
4655  //**********************************************************************************************
4656 
4657  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
4671  template< typename MT3 // Type of the left-hand side target matrix
4672  , typename MT4 // Type of the left-hand side matrix operand
4673  , typename MT5 // Type of the right-hand side matrix operand
4674  , typename ST2 > // Type of the scalar value
4675  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4676  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4677  {
4678  selectDefaultSubAssignKernel( C, A, B, scalar );
4679  }
4680  //**********************************************************************************************
4681 
4682  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
4683 #if BLAZE_BLAS_MODE
4684 
4697  template< typename MT3 // Type of the left-hand side target matrix
4698  , typename MT4 // Type of the left-hand side matrix operand
4699  , typename MT5 // Type of the right-hand side matrix operand
4700  , typename ST2 > // Type of the scalar value
4701  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4702  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4703  {
4704  sgemm( C, A, B, -scalar, 1.0F );
4705  }
4706 #endif
4707  //**********************************************************************************************
4708 
4709  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
4710 #if BLAZE_BLAS_MODE
4711 
4724  template< typename MT3 // Type of the left-hand side target matrix
4725  , typename MT4 // Type of the left-hand side matrix operand
4726  , typename MT5 // Type of the right-hand side matrix operand
4727  , typename ST2 > // Type of the scalar value
4728  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4729  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4730  {
4731  dgemm( C, A, B, -scalar, 1.0 );
4732  }
4733 #endif
4734  //**********************************************************************************************
4735 
4736  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
4737 #if BLAZE_BLAS_MODE
4738 
4751  template< typename MT3 // Type of the left-hand side target matrix
4752  , typename MT4 // Type of the left-hand side matrix operand
4753  , typename MT5 // Type of the right-hand side matrix operand
4754  , typename ST2 > // Type of the scalar value
4755  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4756  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4757  {
4758  cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4759  }
4760 #endif
4761  //**********************************************************************************************
4762 
4763  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
4764 #if BLAZE_BLAS_MODE
4765 
4778  template< typename MT3 // Type of the left-hand side target matrix
4779  , typename MT4 // Type of the left-hand side matrix operand
4780  , typename MT5 // Type of the right-hand side matrix operand
4781  , typename ST2 > // Type of the scalar value
4782  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4783  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4784  {
4785  zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4786  }
4787 #endif
4788  //**********************************************************************************************
4789 
4790  //**Subtraction assignment to sparse matrices***************************************************
4791  // No special implementation for the subtraction assignment to sparse matrices.
4792  //**********************************************************************************************
4793 
4794  //**Multiplication assignment to dense matrices*************************************************
4795  // No special implementation for the multiplication assignment to dense matrices.
4796  //**********************************************************************************************
4797 
4798  //**Multiplication assignment to sparse matrices************************************************
4799  // No special implementation for the multiplication assignment to sparse matrices.
4800  //**********************************************************************************************
4801 
4802  //**SMP assignment to dense matrices************************************************************
4817  template< typename MT // Type of the target dense matrix
4818  , bool SO > // Storage order of the target dense matrix
4819  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4820  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4821  {
4823 
4824  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4825  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4826 
4827  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4828  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4829 
4830  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4831  return;
4832  }
4833  else if( left.columns() == 0UL ) {
4834  reset( ~lhs );
4835  return;
4836  }
4837 
4838  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4839  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4840 
4841  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4842  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4843  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4844  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4845  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4846  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4847 
4848  smpAssign( ~lhs, A * B * rhs.scalar_ );
4849  }
4850  //**********************************************************************************************
4851 
4852  //**SMP assignment to sparse matrices***********************************************************
4867  template< typename MT // Type of the target sparse matrix
4868  , bool SO > // Storage order of the target sparse matrix
4869  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4870  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4871  {
4873 
4874  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
4875 
4882 
4883  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4884  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4885 
4886  const TmpType tmp( rhs );
4887  smpAssign( ~lhs, tmp );
4888  }
4889  //**********************************************************************************************
4890 
4891  //**SMP addition assignment to dense matrices***************************************************
4906  template< typename MT // Type of the target dense matrix
4907  , bool SO > // Storage order of the target dense matrix
4908  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4909  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4910  {
4912 
4913  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4914  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4915 
4916  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4917  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4918 
4919  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4920  return;
4921  }
4922 
4923  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4924  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4925 
4926  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4927  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4928  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4929  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4930  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4931  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4932 
4933  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
4934  }
4935  //**********************************************************************************************
4936 
4937  //**SMP addition assignment to sparse matrices**************************************************
4938  // No special implementation for the SMP addition assignment to sparse matrices.
4939  //**********************************************************************************************
4940 
4941  //**SMP subtraction assignment to dense matrices************************************************
4956  template< typename MT // Type of the target dense matrix
4957  , bool SO > // Storage order of the target dense matrix
4958  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4959  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4960  {
4962 
4963  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4964  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4965 
4966  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4967  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4968 
4969  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4970  return;
4971  }
4972 
4973  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4974  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4975 
4976  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4977  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4978  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4979  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4980  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4981  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4982 
4983  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
4984  }
4985  //**********************************************************************************************
4986 
4987  //**SMP subtraction assignment to sparse matrices***********************************************
4988  // No special implementation for the SMP subtraction assignment to sparse matrices.
4989  //**********************************************************************************************
4990 
4991  //**SMP multiplication assignment to dense matrices*********************************************
4992  // No special implementation for the SMP multiplication assignment to dense matrices.
4993  //**********************************************************************************************
4994 
4995  //**SMP multiplication assignment to sparse matrices********************************************
4996  // No special implementation for the SMP multiplication assignment to sparse matrices.
4997  //**********************************************************************************************
4998 
4999  //**Compile time checks*************************************************************************
5008  //**********************************************************************************************
5009 };
5011 //*************************************************************************************************
5012 
5013 
5014 
5015 
5016 //=================================================================================================
5017 //
5018 // GLOBAL BINARY ARITHMETIC OPERATORS
5019 //
5020 //=================================================================================================
5021 
5022 //*************************************************************************************************
5051 template< typename T1 // Type of the left-hand side dense matrix
5052  , typename T2 > // Type of the right-hand side dense matrix
5053 inline const TDMatDMatMultExpr<T1,T2>
5055 {
5057 
5058  if( (~lhs).columns() != (~rhs).rows() )
5059  throw std::invalid_argument( "Matrix sizes do not match" );
5060 
5061  return TDMatDMatMultExpr<T1,T2>( ~lhs, ~rhs );
5062 }
5063 //*************************************************************************************************
5064 
5065 
5066 
5067 
5068 //=================================================================================================
5069 //
5070 // ROWS SPECIALIZATIONS
5071 //
5072 //=================================================================================================
5073 
5074 //*************************************************************************************************
5076 template< typename MT1, typename MT2 >
5077 struct Rows< TDMatDMatMultExpr<MT1,MT2> >
5078  : public Rows<MT1>
5079 {};
5081 //*************************************************************************************************
5082 
5083 
5084 
5085 
5086 //=================================================================================================
5087 //
5088 // COLUMNS SPECIALIZATIONS
5089 //
5090 //=================================================================================================
5091 
5092 //*************************************************************************************************
5094 template< typename MT1, typename MT2 >
5095 struct Columns< TDMatDMatMultExpr<MT1,MT2> >
5096  : public Columns<MT2>
5097 {};
5099 //*************************************************************************************************
5100 
5101 
5102 
5103 
5104 //=================================================================================================
5105 //
5106 // ISLOWER SPECIALIZATIONS
5107 //
5108 //=================================================================================================
5109 
5110 //*************************************************************************************************
5112 template< typename MT1, typename MT2 >
5113 struct IsLower< TDMatDMatMultExpr<MT1,MT2> >
5114  : public IsTrue< IsLower<MT1>::value && IsLower<MT2>::value >
5115 {};
5117 //*************************************************************************************************
5118 
5119 
5120 
5121 
5122 //=================================================================================================
5123 //
5124 // ISUPPER SPECIALIZATIONS
5125 //
5126 //=================================================================================================
5127 
5128 //*************************************************************************************************
5130 template< typename MT1, typename MT2 >
5131 struct IsUpper< TDMatDMatMultExpr<MT1,MT2> >
5132  : public IsTrue< IsUpper<MT1>::value && IsUpper<MT2>::value >
5133 {};
5135 //*************************************************************************************************
5136 
5137 
5138 
5139 
5140 //=================================================================================================
5141 //
5142 // EXPRESSION TRAIT SPECIALIZATIONS
5143 //
5144 //=================================================================================================
5145 
5146 //*************************************************************************************************
5148 template< typename MT1, typename MT2, typename VT >
5149 struct TDMatDVecMultExprTrait< TDMatDMatMultExpr<MT1,MT2>, VT >
5150 {
5151  public:
5152  //**********************************************************************************************
5153  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
5154  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
5155  IsDenseVector<VT>::value && IsColumnVector<VT>::value
5156  , typename TDMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
5157  , INVALID_TYPE >::Type Type;
5158  //**********************************************************************************************
5159 };
5161 //*************************************************************************************************
5162 
5163 
5164 //*************************************************************************************************
5166 template< typename MT1, typename MT2, typename VT >
5167 struct TDMatSVecMultExprTrait< TDMatDMatMultExpr<MT1,MT2>, VT >
5168 {
5169  public:
5170  //**********************************************************************************************
5171  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
5172  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
5173  IsSparseVector<VT>::value && IsColumnVector<VT>::value
5174  , typename TDMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
5175  , INVALID_TYPE >::Type Type;
5176  //**********************************************************************************************
5177 };
5179 //*************************************************************************************************
5180 
5181 
5182 //*************************************************************************************************
5184 template< typename VT, typename MT1, typename MT2 >
5185 struct TDVecTDMatMultExprTrait< VT, TDMatDMatMultExpr<MT1,MT2> >
5186 {
5187  public:
5188  //**********************************************************************************************
5189  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
5190  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
5191  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
5192  , typename TDVecDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
5193  , INVALID_TYPE >::Type Type;
5194  //**********************************************************************************************
5195 };
5197 //*************************************************************************************************
5198 
5199 
5200 //*************************************************************************************************
5202 template< typename VT, typename MT1, typename MT2 >
5203 struct TSVecTDMatMultExprTrait< VT, TDMatDMatMultExpr<MT1,MT2> >
5204 {
5205  public:
5206  //**********************************************************************************************
5207  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
5208  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
5209  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
5210  , typename TDVecDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
5211  , INVALID_TYPE >::Type Type;
5212  //**********************************************************************************************
5213 };
5215 //*************************************************************************************************
5216 
5217 
5218 //*************************************************************************************************
5220 template< typename MT1, typename MT2, bool AF >
5221 struct SubmatrixExprTrait< TDMatDMatMultExpr<MT1,MT2>, AF >
5222 {
5223  public:
5224  //**********************************************************************************************
5225  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
5226  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
5227  //**********************************************************************************************
5228 };
5230 //*************************************************************************************************
5231 
5232 
5233 //*************************************************************************************************
5235 template< typename MT1, typename MT2 >
5236 struct RowExprTrait< TDMatDMatMultExpr<MT1,MT2> >
5237 {
5238  public:
5239  //**********************************************************************************************
5240  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
5241  //**********************************************************************************************
5242 };
5244 //*************************************************************************************************
5245 
5246 
5247 //*************************************************************************************************
5249 template< typename MT1, typename MT2 >
5250 struct ColumnExprTrait< TDMatDMatMultExpr<MT1,MT2> >
5251 {
5252  public:
5253  //**********************************************************************************************
5254  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
5255  //**********************************************************************************************
5256 };
5258 //*************************************************************************************************
5259 
5260 } // namespace blaze
5261 
5262 #endif
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatDMatMultExpr.h:356
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatDMatMultExpr.h:429
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:258
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
Header file for the IsColumnMajorMatrix type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatDMatMultExpr.h:410
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:224
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatDMatMultExpr.h:129
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:255
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: TDMatDMatMultExpr.h:376
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatDMatMultExpr.h:265
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
const size_t TDMATDMATMULT_THRESHOLD
Column-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the ...
Definition: Thresholds.h:159
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Expression object for transpose dense matrix-dense matrix multiplications.The TDMatDMatMultExpr class...
Definition: Forward.h:125
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatDMatMultExpr.h:128
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDMatMultExpr.h:132
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatDMatMultExpr.h:133
TDMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatDMatMultExpr class.
Definition: TDMatDMatMultExpr.h:301
Header file for the multiplication trait.
Header file for the IsSymmetric type trait.
Header file for the IsDouble type trait.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatDMatMultExpr.h:277
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:104
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TSVecTDMatMultExprTrait class template.
Header file for the TDMatSVecMultExprTrait class template.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatDMatMultExpr.h:266
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
const size_t SMP_TDMATDMATMULT_THRESHOLD
SMP column-major dense matrix/row-major dense matrix multiplication threshold.This threshold specifie...
Definition: Thresholds.h:880
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatDMatMultExpr.h:274
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
TDMatDMatMultExpr< MT1, MT2 > This
Type of this TDMatDMatMultExpr instance.
Definition: TDMatDMatMultExpr.h:261
Header file for the IsLower type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatDMatMultExpr.h:280
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatDMatMultExpr.h:271
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatDMatMultExpr.h:267
Header file for the IsNumeric type trait.
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:211
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatDMatMultExpr.h:130
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:104
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatDMatMultExpr.h:420
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatDMatMultExpr.h:131
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatDMatMultExpr.h:264
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatDMatMultExpr.h:400
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatDMatMultExpr.h:366
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
Header file for the TDVecDMatMultExprTrait class template.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatDMatMultExpr.h:263
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
Header file for the IsTrue value trait.
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the complex data type.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatDMatMultExpr.h:346
Header file for the IsUpper type trait.
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatDMatMultExpr.h:268
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatDMatMultExpr.h:430
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatDMatMultExpr.h:316
Constraint on the data type.
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T > >::Type store(T *address, const typename Store< T, sizeof(T)>::Type &value)
Aligned store of a vector of integral values.
Definition: Store.h:225
Constraint on the data type.
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatDMatMultExpr.h:262
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
Header file for the IsExpression type trait class.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatDMatMultExpr.h:388
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849