All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
53 #include <blaze/math/Intrinsics.h>
54 #include <blaze/math/shims/Reset.h>
80 #include <blaze/system/BLAS.h>
82 #include <blaze/util/Assert.h>
83 #include <blaze/util/Complex.h>
89 #include <blaze/util/DisableIf.h>
90 #include <blaze/util/EnableIf.h>
91 #include <blaze/util/InvalidType.h>
93 #include <blaze/util/SelectType.h>
94 #include <blaze/util/Types.h>
101 
102 
103 namespace blaze {
104 
105 //=================================================================================================
106 //
107 // CLASS TDMATTDMATMULTEXPR
108 //
109 //=================================================================================================
110 
111 //*************************************************************************************************
118 template< typename MT1 // Type of the left-hand side dense matrix
119  , typename MT2 > // Type of the right-hand side dense matrix
120 class TDMatTDMatMultExpr : public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
121  , private MatMatMultExpr
122  , private Computation
123 {
124  private:
125  //**Type definitions****************************************************************************
126  typedef typename MT1::ResultType RT1;
127  typedef typename MT2::ResultType RT2;
128  typedef typename RT1::ElementType ET1;
129  typedef typename RT2::ElementType ET2;
130  typedef typename MT1::CompositeType CT1;
131  typedef typename MT2::CompositeType CT2;
132  //**********************************************************************************************
133 
134  //**********************************************************************************************
137  //**********************************************************************************************
138 
139  //**********************************************************************************************
141  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
142  //**********************************************************************************************
143 
144  //**********************************************************************************************
146 
152  template< typename T1, typename T2, typename T3 >
153  struct CanExploitSymmetry {
154  enum { value = IsRowMajorMatrix<T1>::value &&
155  ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
156  };
158  //**********************************************************************************************
159 
160  //**********************************************************************************************
162 
166  template< typename T1, typename T2, typename T3 >
167  struct IsEvaluationRequired {
168  enum { value = ( evaluateLeft || evaluateRight ) &&
169  CanExploitSymmetry<T1,T2,T3>::value };
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
179  template< typename T1, typename T2, typename T3 >
180  struct UseSinglePrecisionKernel {
181  enum { value = BLAZE_BLAS_MODE &&
182  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
183  IsFloat<typename T1::ElementType>::value &&
184  IsFloat<typename T2::ElementType>::value &&
185  IsFloat<typename T3::ElementType>::value };
186  };
188  //**********************************************************************************************
189 
190  //**********************************************************************************************
192 
195  template< typename T1, typename T2, typename T3 >
196  struct UseDoublePrecisionKernel {
197  enum { value = BLAZE_BLAS_MODE &&
198  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
199  IsDouble<typename T1::ElementType>::value &&
200  IsDouble<typename T2::ElementType>::value &&
201  IsDouble<typename T3::ElementType>::value };
202  };
204  //**********************************************************************************************
205 
206  //**********************************************************************************************
208 
212  template< typename T1, typename T2, typename T3 >
213  struct UseSinglePrecisionComplexKernel {
214  typedef complex<float> Type;
215  enum { value = BLAZE_BLAS_MODE &&
216  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
217  IsSame<typename T1::ElementType,Type>::value &&
218  IsSame<typename T2::ElementType,Type>::value &&
219  IsSame<typename T3::ElementType,Type>::value };
220  };
222  //**********************************************************************************************
223 
224  //**********************************************************************************************
226 
230  template< typename T1, typename T2, typename T3 >
231  struct UseDoublePrecisionComplexKernel {
232  typedef complex<double> Type;
233  enum { value = BLAZE_BLAS_MODE &&
234  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
235  IsSame<typename T1::ElementType,Type>::value &&
236  IsSame<typename T2::ElementType,Type>::value &&
237  IsSame<typename T3::ElementType,Type>::value };
238  };
240  //**********************************************************************************************
241 
242  //**********************************************************************************************
244 
247  template< typename T1, typename T2, typename T3 >
248  struct UseDefaultKernel {
249  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
250  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
251  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
252  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
253  };
255  //**********************************************************************************************
256 
257  //**********************************************************************************************
259 
262  template< typename T1, typename T2, typename T3 >
263  struct UseVectorizedDefaultKernel {
264  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
265  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
266  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
267  IntrinsicTrait<typename T1::ElementType>::addition &&
268  IntrinsicTrait<typename T1::ElementType>::subtraction &&
269  IntrinsicTrait<typename T1::ElementType>::multiplication };
270  };
272  //**********************************************************************************************
273 
274  public:
275  //**Type definitions****************************************************************************
282  typedef const ElementType ReturnType;
283  typedef const ResultType CompositeType;
284 
286  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
287 
289  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
290 
293 
296  //**********************************************************************************************
297 
298  //**Compilation flags***************************************************************************
300  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
304 
306  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
307  !evaluateRight && MT2::smpAssignable };
308  //**********************************************************************************************
309 
310  //**Constructor*********************************************************************************
316  explicit inline TDMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
317  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
318  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
319  {
320  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
321  }
322  //**********************************************************************************************
323 
324  //**Access operator*****************************************************************************
331  inline ReturnType operator()( size_t i, size_t j ) const {
332  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
333  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
334 
335  ElementType tmp;
336 
337  if( lhs_.columns() != 0UL ) {
338  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
339  tmp = lhs_(i,0UL) * rhs_(0UL,j);
340  for( size_t k=1UL; k<end; k+=2UL ) {
341  tmp += lhs_(i,k ) * rhs_(k ,j);
342  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
343  }
344  if( end < lhs_.columns() ) {
345  tmp += lhs_(i,end) * rhs_(end,j);
346  }
347  }
348  else {
349  reset( tmp );
350  }
351 
352  return tmp;
353  }
354  //**********************************************************************************************
355 
356  //**Rows function*******************************************************************************
361  inline size_t rows() const {
362  return lhs_.rows();
363  }
364  //**********************************************************************************************
365 
366  //**Columns function****************************************************************************
371  inline size_t columns() const {
372  return rhs_.columns();
373  }
374  //**********************************************************************************************
375 
376  //**Left operand access*************************************************************************
381  inline LeftOperand leftOperand() const {
382  return lhs_;
383  }
384  //**********************************************************************************************
385 
386  //**Right operand access************************************************************************
391  inline RightOperand rightOperand() const {
392  return rhs_;
393  }
394  //**********************************************************************************************
395 
396  //**********************************************************************************************
402  template< typename T >
403  inline bool canAlias( const T* alias ) const {
404  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
405  }
406  //**********************************************************************************************
407 
408  //**********************************************************************************************
414  template< typename T >
415  inline bool isAliased( const T* alias ) const {
416  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
417  }
418  //**********************************************************************************************
419 
420  //**********************************************************************************************
425  inline bool isAligned() const {
426  return lhs_.isAligned() && rhs_.isAligned();
427  }
428  //**********************************************************************************************
429 
430  //**********************************************************************************************
435  inline bool canSMPAssign() const {
436  return ( !BLAZE_BLAS_IS_PARALLEL ||
437  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
439  }
440  //**********************************************************************************************
441 
442  private:
443  //**Member variables****************************************************************************
446  //**********************************************************************************************
447 
448  //**BLAS kernel (single precision)**************************************************************
449 #if BLAZE_BLAS_MODE
450 
465  template< typename MT3 // Type of the left-hand side target matrix
466  , typename MT4 // Type of the left-hand side matrix operand
467  , typename MT5 > // Type of the right-hand side matrix operand
468  static inline void sgemm( MT3& C, const MT4& A, const MT5& B, float alpha, float beta )
469  {
470  using boost::numeric_cast;
471 
475 
476  const int M ( numeric_cast<int>( A.rows() ) );
477  const int N ( numeric_cast<int>( B.columns() ) );
478  const int K ( numeric_cast<int>( A.columns() ) );
479  const int lda( numeric_cast<int>( A.spacing() ) );
480  const int ldb( numeric_cast<int>( B.spacing() ) );
481  const int ldc( numeric_cast<int>( C.spacing() ) );
482 
484  cblas_ssymm( CblasColMajor, CblasLeft, CblasUpper,
485  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
486  }
488  cblas_ssymm( CblasColMajor, CblasRight, CblasUpper,
489  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
490  }
491  else {
492  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
493  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
494  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
495  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
496  }
497  }
499 #endif
500  //**********************************************************************************************
501 
502  //**BLAS kernel (double precision)**************************************************************
503 #if BLAZE_BLAS_MODE
504 
519  template< typename MT3 // Type of the left-hand side target matrix
520  , typename MT4 // Type of the left-hand side matrix operand
521  , typename MT5 > // Type of the right-hand side matrix operand
522  static inline void dgemm( MT3& C, const MT4& A, const MT5& B, double alpha, double beta )
523  {
524  using boost::numeric_cast;
525 
529 
530  const int M ( numeric_cast<int>( A.rows() ) );
531  const int N ( numeric_cast<int>( B.columns() ) );
532  const int K ( numeric_cast<int>( A.columns() ) );
533  const int lda( numeric_cast<int>( A.spacing() ) );
534  const int ldb( numeric_cast<int>( B.spacing() ) );
535  const int ldc( numeric_cast<int>( C.spacing() ) );
536 
538  cblas_dsymm( CblasColMajor, CblasLeft, CblasUpper,
539  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
540  }
541  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
542  cblas_dsymm( CblasColMajor, CblasRight, CblasUpper,
543  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
544  }
545  else {
546  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
547  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
548  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
549  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
550  }
551  }
553 #endif
554  //**********************************************************************************************
555 
556  //**BLAS kernel (single precision complex)******************************************************
557 #if BLAZE_BLAS_MODE
558 
574  template< typename MT3 // Type of the left-hand side target matrix
575  , typename MT4 // Type of the left-hand side matrix operand
576  , typename MT5 > // Type of the right-hand side matrix operand
577  static inline void cgemm( MT3& C, const MT4& A, const MT5& B,
578  complex<float> alpha, complex<float> beta )
579  {
580  using boost::numeric_cast;
581 
585  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
586  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
587  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
588 
589  const int M ( numeric_cast<int>( A.rows() ) );
590  const int N ( numeric_cast<int>( B.columns() ) );
591  const int K ( numeric_cast<int>( A.columns() ) );
592  const int lda( numeric_cast<int>( A.spacing() ) );
593  const int ldb( numeric_cast<int>( B.spacing() ) );
594  const int ldc( numeric_cast<int>( C.spacing() ) );
595 
596  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
597  cblas_csymm( CblasColMajor, CblasLeft, CblasUpper,
598  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
599  }
600  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
601  cblas_csymm( CblasColMajor, CblasRight, CblasUpper,
602  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
603  }
604  else {
605  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
606  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
607  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
608  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
609  }
610  }
612 #endif
613  //**********************************************************************************************
614 
615  //**BLAS kernel (double precision complex)******************************************************
616 #if BLAZE_BLAS_MODE
617 
633  template< typename MT3 // Type of the left-hand side target matrix
634  , typename MT4 // Type of the left-hand side matrix operand
635  , typename MT5 > // Type of the right-hand side matrix operand
636  static inline void zgemm( MT3& C, const MT4& A, const MT5& B,
637  complex<double> alpha, complex<double> beta )
638  {
639  using boost::numeric_cast;
640 
644  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
645  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
646  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
647 
648  const int M ( numeric_cast<int>( A.rows() ) );
649  const int N ( numeric_cast<int>( B.columns() ) );
650  const int K ( numeric_cast<int>( A.columns() ) );
651  const int lda( numeric_cast<int>( A.spacing() ) );
652  const int ldb( numeric_cast<int>( B.spacing() ) );
653  const int ldc( numeric_cast<int>( C.spacing() ) );
654 
655  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
656  cblas_zsymm( CblasColMajor, CblasLeft, CblasUpper,
657  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
658  }
659  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
660  cblas_zsymm( CblasColMajor, CblasRight, CblasUpper,
661  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
662  }
663  else {
664  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
665  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
666  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
667  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
668  }
669  }
671 #endif
672  //**********************************************************************************************
673 
674  //**Assignment to dense matrices****************************************************************
687  template< typename MT // Type of the target dense matrix
688  , bool SO > // Storage order of the target dense matrix
689  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
690  assign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
691  {
693 
694  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
695  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
696 
697  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
698  return;
699  }
700  else if( rhs.lhs_.columns() == 0UL ) {
701  reset( ~lhs );
702  return;
703  }
704 
705  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
706  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
707 
708  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
709  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
710  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
711  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
712  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
713  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
714 
715  TDMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
716  }
718  //**********************************************************************************************
719 
720  //**Assignment to dense matrices (kernel selection)*********************************************
731  template< typename MT3 // Type of the left-hand side target matrix
732  , typename MT4 // Type of the left-hand side matrix operand
733  , typename MT5 > // Type of the right-hand side matrix operand
734  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
735  {
736  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
737  TDMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
738  else
739  TDMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
740  }
742  //**********************************************************************************************
743 
744  //**Default assignment to dense matrices********************************************************
758  template< typename MT3 // Type of the left-hand side target matrix
759  , typename MT4 // Type of the left-hand side matrix operand
760  , typename MT5 > // Type of the right-hand side matrix operand
761  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
762  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
763  {
764  const size_t M( A.rows() );
765  const size_t N( B.columns() );
766  const size_t K( A.columns() );
767 
768  for( size_t i=0UL; i<M; ++i ) {
769  for( size_t j=0UL; j<N; ++j ) {
770  C(i,j) = A(i,0UL) * B(0UL,j);
771  }
772  for( size_t k=1UL; k<K; ++k ) {
773  for( size_t j=0UL; j<N; ++j ) {
774  C(i,j) += A(i,k) * B(k,j);
775  }
776  }
777  }
778  }
780  //**********************************************************************************************
781 
782  //**Vectorized default assignment to row-major dense matrices***********************************
796  template< typename MT3 // Type of the left-hand side target matrix
797  , typename MT4 // Type of the left-hand side matrix operand
798  , typename MT5 > // Type of the right-hand side matrix operand
799  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
800  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
801  {
804 
805  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
806  const typename MT5::OppositeType tmp( serial( B ) );
807  assign( ~C, A * tmp );
808  }
809  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
810  const typename MT4::OppositeType tmp( serial( A ) );
811  assign( ~C, tmp * B );
812  }
813  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
814  const typename MT5::OppositeType tmp( serial( B ) );
815  assign( ~C, A * tmp );
816  }
817  else {
818  const typename MT4::OppositeType tmp( serial( A ) );
819  assign( ~C, tmp * B );
820  }
821  }
823  //**********************************************************************************************
824 
825  //**Vectorized default assignment to column-major dense matrices********************************
839  template< typename MT3 // Type of the left-hand side target matrix
840  , typename MT4 // Type of the left-hand side matrix operand
841  , typename MT5 > // Type of the right-hand side matrix operand
842  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
843  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
844  {
845  typedef IntrinsicTrait<ElementType> IT;
846 
847  const size_t M( A.rows() );
848  const size_t N( B.columns() );
849  const size_t K( A.columns() );
850 
851  size_t i( 0UL );
852 
853  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
854  for( size_t j=0UL; j<N; ++j ) {
855  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
856  for( size_t k=0UL; k<K; ++k ) {
857  const IntrinsicType b1( set( B(k,j) ) );
858  xmm1 = xmm1 + A.load(i ,k) * b1;
859  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
860  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
861  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
862  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
863  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
864  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
865  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
866  }
867  (~C).store( i , j, xmm1 );
868  (~C).store( i+IT::size , j, xmm2 );
869  (~C).store( i+IT::size*2UL, j, xmm3 );
870  (~C).store( i+IT::size*3UL, j, xmm4 );
871  (~C).store( i+IT::size*4UL, j, xmm5 );
872  (~C).store( i+IT::size*5UL, j, xmm6 );
873  (~C).store( i+IT::size*6UL, j, xmm7 );
874  (~C).store( i+IT::size*7UL, j, xmm8 );
875  }
876  }
877  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
878  size_t j( 0UL );
879  for( ; (j+2UL) <= N; j+=2UL ) {
880  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
881  for( size_t k=0UL; k<K; ++k ) {
882  const IntrinsicType a1( A.load(i ,k) );
883  const IntrinsicType a2( A.load(i+IT::size ,k) );
884  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
885  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
886  const IntrinsicType b1( set( B(k,j ) ) );
887  const IntrinsicType b2( set( B(k,j+1UL) ) );
888  xmm1 = xmm1 + a1 * b1;
889  xmm2 = xmm2 + a2 * b1;
890  xmm3 = xmm3 + a3 * b1;
891  xmm4 = xmm4 + a4 * b1;
892  xmm5 = xmm5 + a1 * b2;
893  xmm6 = xmm6 + a2 * b2;
894  xmm7 = xmm7 + a3 * b2;
895  xmm8 = xmm8 + a4 * b2;
896  }
897  (~C).store( i , j , xmm1 );
898  (~C).store( i+IT::size , j , xmm2 );
899  (~C).store( i+IT::size*2UL, j , xmm3 );
900  (~C).store( i+IT::size*3UL, j , xmm4 );
901  (~C).store( i , j+1UL, xmm5 );
902  (~C).store( i+IT::size , j+1UL, xmm6 );
903  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
904  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
905  }
906  if( j < N ) {
907  IntrinsicType xmm1, xmm2, xmm3, xmm4;
908  for( size_t k=0UL; k<K; ++k ) {
909  const IntrinsicType b1( set( B(k,j) ) );
910  xmm1 = xmm1 + A.load(i ,k) * b1;
911  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
912  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
913  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
914  }
915  (~C).store( i , j, xmm1 );
916  (~C).store( i+IT::size , j, xmm2 );
917  (~C).store( i+IT::size*2UL, j, xmm3 );
918  (~C).store( i+IT::size*3UL, j, xmm4 );
919  }
920  }
921  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
922  size_t j( 0UL );
923  for( ; (j+2UL) <= N; j+=2UL ) {
924  IntrinsicType xmm1, xmm2, xmm3, xmm4;
925  for( size_t k=0UL; k<K; ++k ) {
926  const IntrinsicType a1( A.load(i ,k) );
927  const IntrinsicType a2( A.load(i+IT::size,k) );
928  const IntrinsicType b1( set( B(k,j ) ) );
929  const IntrinsicType b2( set( B(k,j+1UL) ) );
930  xmm1 = xmm1 + a1 * b1;
931  xmm2 = xmm2 + a2 * b1;
932  xmm3 = xmm3 + a1 * b2;
933  xmm4 = xmm4 + a2 * b2;
934  }
935  (~C).store( i , j , xmm1 );
936  (~C).store( i+IT::size, j , xmm2 );
937  (~C).store( i , j+1UL, xmm3 );
938  (~C).store( i+IT::size, j+1UL, xmm4 );
939  }
940  if( j < N ) {
941  IntrinsicType xmm1, xmm2;
942  for( size_t k=0UL; k<K; ++k ) {
943  const IntrinsicType b1( set( B(k,j) ) );
944  xmm1 = xmm1 + A.load(i ,k) * b1;
945  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
946  }
947  (~C).store( i , j, xmm1 );
948  (~C).store( i+IT::size, j, xmm2 );
949  }
950  }
951  if( i < M ) {
952  size_t j( 0UL );
953  for( ; (j+2UL) <= N; j+=2UL ) {
954  IntrinsicType xmm1, xmm2;
955  for( size_t k=0UL; k<K; ++k ) {
956  const IntrinsicType a1( A.load(i,k) );
957  xmm1 = xmm1 + a1 * set( B(k,j ) );
958  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
959  }
960  (~C).store( i, j , xmm1 );
961  (~C).store( i, j+1UL, xmm2 );
962  }
963  if( j < N ) {
964  IntrinsicType xmm1;
965  for( size_t k=0UL; k<K; ++k ) {
966  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
967  }
968  (~C).store( i, j, xmm1 );
969  }
970  }
971  }
973  //**********************************************************************************************
974 
975  //**BLAS-based assignment to dense matrices (default)*******************************************
989  template< typename MT3 // Type of the left-hand side target matrix
990  , typename MT4 // Type of the left-hand side matrix operand
991  , typename MT5 > // Type of the right-hand side matrix operand
992  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
993  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
994  {
995  selectDefaultAssignKernel( C, A, B );
996  }
998  //**********************************************************************************************
999 
1000  //**BLAS-based assignment to dense matrices (single precision)**********************************
1001 #if BLAZE_BLAS_MODE
1002 
1015  template< typename MT3 // Type of the left-hand side target matrix
1016  , typename MT4 // Type of the left-hand side matrix operand
1017  , typename MT5 > // Type of the right-hand side matrix operand
1018  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1019  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1020  {
1021  sgemm( C, A, B, 1.0F, 0.0F );
1022  }
1024 #endif
1025  //**********************************************************************************************
1026 
1027  //**BLAS-based assignment to dense matrices (double precision)**********************************
1028 #if BLAZE_BLAS_MODE
1029 
1042  template< typename MT3 // Type of the left-hand side target matrix
1043  , typename MT4 // Type of the left-hand side matrix operand
1044  , typename MT5 > // Type of the right-hand side matrix operand
1045  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1046  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1047  {
1048  dgemm( C, A, B, 1.0, 0.0 );
1049  }
1051 #endif
1052  //**********************************************************************************************
1053 
1054  //**BLAS-based assignment to dense matrices (single precision complex)**************************
1055 #if BLAZE_BLAS_MODE
1056 
1069  template< typename MT3 // Type of the left-hand side target matrix
1070  , typename MT4 // Type of the left-hand side matrix operand
1071  , typename MT5 > // Type of the right-hand side matrix operand
1072  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1073  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1074  {
1075  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1076  }
1078 #endif
1079  //**********************************************************************************************
1080 
1081  //**BLAS-based assignment to dense matrices (double precision complex)**************************
1082 #if BLAZE_BLAS_MODE
1083 
1096  template< typename MT3 // Type of the left-hand side target matrix
1097  , typename MT4 // Type of the left-hand side matrix operand
1098  , typename MT5 > // Type of the right-hand side matrix operand
1099  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1100  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1101  {
1102  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1103  }
1105 #endif
1106  //**********************************************************************************************
1107 
1108  //**Assignment to sparse matrices***************************************************************
1121  template< typename MT // Type of the target sparse matrix
1122  , bool SO > // Storage order of the target sparse matrix
1123  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1124  assign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1125  {
1127 
1128  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
1129 
1136 
1137  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1138  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1139 
1140  const TmpType tmp( serial( rhs ) );
1141  assign( ~lhs, tmp );
1142  }
1144  //**********************************************************************************************
1145 
1146  //**Restructuring assignment to row-major matrices**********************************************
1161  template< typename MT > // Type of the target matrix
1162  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1163  assign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
1164  {
1166 
1168 
1169  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1170  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1171 
1172  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1173  assign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
1174  else if( IsSymmetric<MT1>::value )
1175  assign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
1176  else
1177  assign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
1178  }
1180  //**********************************************************************************************
1181 
1182  //**Addition assignment to dense matrices*******************************************************
1195  template< typename MT // Type of the target dense matrix
1196  , bool SO > // Storage order of the target dense matrix
1197  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1198  addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1199  {
1201 
1202  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1203  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1204 
1205  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1206  return;
1207  }
1208 
1209  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1210  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1211 
1212  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1213  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1214  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1215  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1216  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1217  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1218 
1219  TDMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1220  }
1222  //**********************************************************************************************
1223 
1224  //**Addition assignment to dense matrices (kernel selection)************************************
1235  template< typename MT3 // Type of the left-hand side target matrix
1236  , typename MT4 // Type of the left-hand side matrix operand
1237  , typename MT5 > // Type of the right-hand side matrix operand
1238  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1239  {
1240  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
1241  TDMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1242  else
1243  TDMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1244  }
1246  //**********************************************************************************************
1247 
1248  //**Default addition assignment to dense matrices***********************************************
1262  template< typename MT3 // Type of the left-hand side target matrix
1263  , typename MT4 // Type of the left-hand side matrix operand
1264  , typename MT5 > // Type of the right-hand side matrix operand
1265  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1266  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1267  {
1268  const size_t M( A.rows() );
1269  const size_t N( B.columns() );
1270  const size_t K( A.columns() );
1271 
1272  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1273  const size_t end( N & size_t(-2) );
1274 
1275  for( size_t i=0UL; i<M; ++i ) {
1276  for( size_t k=0UL; k<K; ++k ) {
1277  for( size_t j=0UL; j<end; j+=2UL ) {
1278  C(i,j ) += A(i,k) * B(k,j );
1279  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1280  }
1281  if( end < N ) {
1282  C(i,end) += A(i,k) * B(k,end);
1283  }
1284  }
1285  }
1286  }
1288  //**********************************************************************************************
1289 
1290  //**Vectorized default addition assignment to row-major dense matrices**************************
1304  template< typename MT3 // Type of the left-hand side target matrix
1305  , typename MT4 // Type of the left-hand side matrix operand
1306  , typename MT5 > // Type of the right-hand side matrix operand
1307  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1308  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1309  {
1312 
1313  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1314  const typename MT5::OppositeType tmp( serial( B ) );
1315  addAssign( ~C, A * tmp );
1316  }
1317  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1318  const typename MT4::OppositeType tmp( serial( A ) );
1319  addAssign( ~C, tmp * B );
1320  }
1321  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1322  const typename MT5::OppositeType tmp( serial( B ) );
1323  addAssign( ~C, A * tmp );
1324  }
1325  else {
1326  const typename MT4::OppositeType tmp( serial( A ) );
1327  addAssign( ~C, tmp * B );
1328  }
1329  }
1331  //**********************************************************************************************
1332 
1333  //**Vectorized default addition assignment to column-major dense matrices***********************
1347  template< typename MT3 // Type of the left-hand side target matrix
1348  , typename MT4 // Type of the left-hand side matrix operand
1349  , typename MT5 > // Type of the right-hand side matrix operand
1350  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1351  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1352  {
1353  typedef IntrinsicTrait<ElementType> IT;
1354 
1355  const size_t M( A.rows() );
1356  const size_t N( B.columns() );
1357  const size_t K( A.columns() );
1358 
1359  size_t i( 0UL );
1360 
1361  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1362  for( size_t j=0UL; j<N; ++j ) {
1363  IntrinsicType xmm1( (~C).load(i ,j) );
1364  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1365  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1366  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1367  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
1368  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
1369  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
1370  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
1371  for( size_t k=0UL; k<K; ++k ) {
1372  const IntrinsicType b1( set( B(k,j) ) );
1373  xmm1 = xmm1 + A.load(i ,k) * b1;
1374  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1375  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1376  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1377  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
1378  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
1379  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
1380  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
1381  }
1382  (~C).store( i , j, xmm1 );
1383  (~C).store( i+IT::size , j, xmm2 );
1384  (~C).store( i+IT::size*2UL, j, xmm3 );
1385  (~C).store( i+IT::size*3UL, j, xmm4 );
1386  (~C).store( i+IT::size*4UL, j, xmm5 );
1387  (~C).store( i+IT::size*5UL, j, xmm6 );
1388  (~C).store( i+IT::size*6UL, j, xmm7 );
1389  (~C).store( i+IT::size*7UL, j, xmm8 );
1390  }
1391  }
1392  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1393  size_t j( 0UL );
1394  for( ; (j+2UL) <= N; j+=2UL ) {
1395  IntrinsicType xmm1( (~C).load(i ,j ) );
1396  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
1397  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
1398  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
1399  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1400  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
1401  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
1402  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
1403  for( size_t k=0UL; k<K; ++k ) {
1404  const IntrinsicType a1( A.load(i ,k) );
1405  const IntrinsicType a2( A.load(i+IT::size ,k) );
1406  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
1407  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
1408  const IntrinsicType b1( set( B(k,j ) ) );
1409  const IntrinsicType b2( set( B(k,j+1UL) ) );
1410  xmm1 = xmm1 + a1 * b1;
1411  xmm2 = xmm2 + a2 * b1;
1412  xmm3 = xmm3 + a3 * b1;
1413  xmm4 = xmm4 + a4 * b1;
1414  xmm5 = xmm5 + a1 * b2;
1415  xmm6 = xmm6 + a2 * b2;
1416  xmm7 = xmm7 + a3 * b2;
1417  xmm8 = xmm8 + a4 * b2;
1418  }
1419  (~C).store( i , j , xmm1 );
1420  (~C).store( i+IT::size , j , xmm2 );
1421  (~C).store( i+IT::size*2UL, j , xmm3 );
1422  (~C).store( i+IT::size*3UL, j , xmm4 );
1423  (~C).store( i , j+1UL, xmm5 );
1424  (~C).store( i+IT::size , j+1UL, xmm6 );
1425  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
1426  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
1427  }
1428  if( j < N ) {
1429  IntrinsicType xmm1( (~C).load(i ,j) );
1430  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1431  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1432  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1433  for( size_t k=0UL; k<K; ++k ) {
1434  const IntrinsicType b1( set( B(k,j) ) );
1435  xmm1 = xmm1 + A.load(i ,k) * b1;
1436  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1437  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1438  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1439  }
1440  (~C).store( i , j, xmm1 );
1441  (~C).store( i+IT::size , j, xmm2 );
1442  (~C).store( i+IT::size*2UL, j, xmm3 );
1443  (~C).store( i+IT::size*3UL, j, xmm4 );
1444  }
1445  }
1446  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1447  size_t j( 0UL );
1448  for( ; (j+2UL) <= N; j+=2UL ) {
1449  IntrinsicType xmm1( (~C).load(i ,j ) );
1450  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
1451  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1452  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
1453  for( size_t k=0UL; k<K; ++k ) {
1454  const IntrinsicType a1( A.load(i ,k) );
1455  const IntrinsicType a2( A.load(i+IT::size,k) );
1456  const IntrinsicType b1( set( B(k,j ) ) );
1457  const IntrinsicType b2( set( B(k,j+1UL) ) );
1458  xmm1 = xmm1 + a1 * b1;
1459  xmm2 = xmm2 + a2 * b1;
1460  xmm3 = xmm3 + a1 * b2;
1461  xmm4 = xmm4 + a2 * b2;
1462  }
1463  (~C).store( i , j , xmm1 );
1464  (~C).store( i+IT::size, j , xmm2 );
1465  (~C).store( i , j+1UL, xmm3 );
1466  (~C).store( i+IT::size, j+1UL, xmm4 );
1467  }
1468  if( j < N ) {
1469  IntrinsicType xmm1( (~C).load(i ,j) );
1470  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
1471  for( size_t k=0UL; k<K; ++k ) {
1472  const IntrinsicType b1( set( B(k,j) ) );
1473  xmm1 = xmm1 + A.load(i ,k) * b1;
1474  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1475  }
1476  (~C).store( i , j, xmm1 );
1477  (~C).store( i+IT::size, j, xmm2 );
1478  }
1479  }
1480  if( i < M ) {
1481  size_t j( 0UL );
1482  for( ; (j+2UL) <= N; j+=2UL ) {
1483  IntrinsicType xmm1( (~C).load(i,j ) );
1484  IntrinsicType xmm2( (~C).load(i,j+1UL) );
1485  for( size_t k=0UL; k<K; ++k ) {
1486  const IntrinsicType a1( A.load(i,k) );
1487  xmm1 = xmm1 + a1 * set( B(k,j ) );
1488  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1489  }
1490  (~C).store( i, j , xmm1 );
1491  (~C).store( i, j+1UL, xmm2 );
1492  }
1493  if( j < N ) {
1494  IntrinsicType xmm1( (~C).load(i,j) );
1495  for( size_t k=0UL; k<K; ++k ) {
1496  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
1497  }
1498  (~C).store( i, j, xmm1 );
1499  }
1500  }
1501  }
1503  //**********************************************************************************************
1504 
1505  //**BLAS-based addition assignment to dense matrices (default)**********************************
1519  template< typename MT3 // Type of the left-hand side target matrix
1520  , typename MT4 // Type of the left-hand side matrix operand
1521  , typename MT5 > // Type of the right-hand side matrix operand
1522  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1523  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1524  {
1525  selectDefaultAddAssignKernel( C, A, B );
1526  }
1528  //**********************************************************************************************
1529 
1530  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1531 #if BLAZE_BLAS_MODE
1532 
1545  template< typename MT3 // Type of the left-hand side target matrix
1546  , typename MT4 // Type of the left-hand side matrix operand
1547  , typename MT5 > // Type of the right-hand side matrix operand
1548  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1549  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1550  {
1551  sgemm( C, A, B, 1.0F, 1.0F );
1552  }
1554 #endif
1555  //**********************************************************************************************
1556 
1557  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1558 #if BLAZE_BLAS_MODE
1559 
1572  template< typename MT3 // Type of the left-hand side target matrix
1573  , typename MT4 // Type of the left-hand side matrix operand
1574  , typename MT5 > // Type of the right-hand side matrix operand
1575  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1576  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1577  {
1578  dgemm( C, A, B, 1.0, 1.0 );
1579  }
1581 #endif
1582  //**********************************************************************************************
1583 
1584  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1585 #if BLAZE_BLAS_MODE
1586 
1599  template< typename MT3 // Type of the left-hand side target matrix
1600  , typename MT4 // Type of the left-hand side matrix operand
1601  , typename MT5 > // Type of the right-hand side matrix operand
1602  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1603  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1604  {
1605  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1606  }
1608 #endif
1609  //**********************************************************************************************
1610 
1611  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1612 #if BLAZE_BLAS_MODE
1613 
1626  template< typename MT3 // Type of the left-hand side target matrix
1627  , typename MT4 // Type of the left-hand side matrix operand
1628  , typename MT5 > // Type of the right-hand side matrix operand
1629  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1630  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1631  {
1632  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1633  }
1635 #endif
1636  //**********************************************************************************************
1637 
1638  //**Restructuring addition assignment to row-major matrices*************************************
1653  template< typename MT > // Type of the target matrix
1654  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1655  addAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
1656  {
1658 
1660 
1661  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1662  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1663 
1664  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1665  addAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
1666  else if( IsSymmetric<MT1>::value )
1667  addAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
1668  else
1669  addAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
1670  }
1672  //**********************************************************************************************
1673 
1674  //**Addition assignment to sparse matrices******************************************************
1675  // No special implementation for the addition assignment to sparse matrices.
1676  //**********************************************************************************************
1677 
1678  //**Subtraction assignment to dense matrices****************************************************
1691  template< typename MT // Type of the target dense matrix
1692  , bool SO > // Storage order of the target dense matrix
1693  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1694  subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1695  {
1697 
1698  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1699  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1700 
1701  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1702  return;
1703  }
1704 
1705  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1706  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1707 
1708  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1709  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1710  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1711  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1712  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1713  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1714 
1715  TDMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1716  }
1718  //**********************************************************************************************
1719 
1720  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1731  template< typename MT3 // Type of the left-hand side target matrix
1732  , typename MT4 // Type of the left-hand side matrix operand
1733  , typename MT5 > // Type of the right-hand side matrix operand
1734  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1735  {
1736  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
1737  TDMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1738  else
1739  TDMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1740  }
1742  //**********************************************************************************************
1743 
1744  //**Default subtraction assignment to dense matrices********************************************
1758  template< typename MT3 // Type of the left-hand side target matrix
1759  , typename MT4 // Type of the left-hand side matrix operand
1760  , typename MT5 > // Type of the right-hand side matrix operand
1761  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1762  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1763  {
1764  const size_t M( A.rows() );
1765  const size_t N( B.columns() );
1766  const size_t K( A.columns() );
1767 
1768  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1769  const size_t end( N & size_t(-2) );
1770 
1771  for( size_t i=0UL; i<M; ++i ) {
1772  for( size_t k=0UL; k<K; ++k ) {
1773  for( size_t j=0UL; j<end; j+=2UL ) {
1774  C(i,j ) -= A(i,k) * B(k,j );
1775  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1776  }
1777  if( end < N ) {
1778  C(i,end) -= A(i,k) * B(k,end);
1779  }
1780  }
1781  }
1782  }
1784  //**********************************************************************************************
1785 
1786  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1800  template< typename MT3 // Type of the left-hand side target matrix
1801  , typename MT4 // Type of the left-hand side matrix operand
1802  , typename MT5 > // Type of the right-hand side matrix operand
1803  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1804  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1805  {
1808 
1809  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1810  const typename MT5::OppositeType tmp( serial( B ) );
1811  subAssign( ~C, A * tmp );
1812  }
1813  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1814  const typename MT4::OppositeType tmp( serial( A ) );
1815  subAssign( ~C, tmp * B );
1816  }
1817  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1818  const typename MT5::OppositeType tmp( serial( B ) );
1819  subAssign( ~C, A * tmp );
1820  }
1821  else {
1822  const typename MT4::OppositeType tmp( serial( A ) );
1823  subAssign( ~C, tmp * B );
1824  }
1825  }
1827  //**********************************************************************************************
1828 
1829  //**Vectorized default subtraction assignment to column-major dense matrices********************
1843  template< typename MT3 // Type of the left-hand side target matrix
1844  , typename MT4 // Type of the left-hand side matrix operand
1845  , typename MT5 > // Type of the right-hand side matrix operand
1846  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1847  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1848  {
1849  typedef IntrinsicTrait<ElementType> IT;
1850 
1851  const size_t M( A.rows() );
1852  const size_t N( B.columns() );
1853  const size_t K( A.columns() );
1854 
1855  size_t i( 0UL );
1856 
1857  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1858  for( size_t j=0UL; j<N; ++j ) {
1859  IntrinsicType xmm1( (~C).load(i ,j) );
1860  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1861  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1862  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1863  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
1864  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
1865  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
1866  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
1867  for( size_t k=0UL; k<K; ++k ) {
1868  const IntrinsicType b1( set( B(k,j) ) );
1869  xmm1 = xmm1 - A.load(i ,k) * b1;
1870  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1871  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1872  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1873  xmm5 = xmm5 - A.load(i+IT::size*4UL,k) * b1;
1874  xmm6 = xmm6 - A.load(i+IT::size*5UL,k) * b1;
1875  xmm7 = xmm7 - A.load(i+IT::size*6UL,k) * b1;
1876  xmm8 = xmm8 - A.load(i+IT::size*7UL,k) * b1;
1877  }
1878  (~C).store( i , j, xmm1 );
1879  (~C).store( i+IT::size , j, xmm2 );
1880  (~C).store( i+IT::size*2UL, j, xmm3 );
1881  (~C).store( i+IT::size*3UL, j, xmm4 );
1882  (~C).store( i+IT::size*4UL, j, xmm5 );
1883  (~C).store( i+IT::size*5UL, j, xmm6 );
1884  (~C).store( i+IT::size*6UL, j, xmm7 );
1885  (~C).store( i+IT::size*7UL, j, xmm8 );
1886  }
1887  }
1888  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1889  size_t j( 0UL );
1890  for( ; (j+2UL) <= N; j+=2UL ) {
1891  IntrinsicType xmm1( (~C).load(i ,j ) );
1892  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
1893  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
1894  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
1895  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1896  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
1897  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
1898  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
1899  for( size_t k=0UL; k<K; ++k ) {
1900  const IntrinsicType a1( A.load(i ,k) );
1901  const IntrinsicType a2( A.load(i+IT::size ,k) );
1902  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
1903  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
1904  const IntrinsicType b1( set( B(k,j ) ) );
1905  const IntrinsicType b2( set( B(k,j+1UL) ) );
1906  xmm1 = xmm1 - a1 * b1;
1907  xmm2 = xmm2 - a2 * b1;
1908  xmm3 = xmm3 - a3 * b1;
1909  xmm4 = xmm4 - a4 * b1;
1910  xmm5 = xmm5 - a1 * b2;
1911  xmm6 = xmm6 - a2 * b2;
1912  xmm7 = xmm7 - a3 * b2;
1913  xmm8 = xmm8 - a4 * b2;
1914  }
1915  (~C).store( i , j , xmm1 );
1916  (~C).store( i+IT::size , j , xmm2 );
1917  (~C).store( i+IT::size*2UL, j , xmm3 );
1918  (~C).store( i+IT::size*3UL, j , xmm4 );
1919  (~C).store( i , j+1UL, xmm5 );
1920  (~C).store( i+IT::size , j+1UL, xmm6 );
1921  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
1922  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
1923  }
1924  if( j < N ) {
1925  IntrinsicType xmm1( (~C).load(i ,j) );
1926  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1927  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1928  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1929  for( size_t k=0UL; k<K; ++k ) {
1930  const IntrinsicType b1( set( B(k,j) ) );
1931  xmm1 = xmm1 - A.load(i ,k) * b1;
1932  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1933  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1934  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1935  }
1936  (~C).store( i , j, xmm1 );
1937  (~C).store( i+IT::size , j, xmm2 );
1938  (~C).store( i+IT::size*2UL, j, xmm3 );
1939  (~C).store( i+IT::size*3UL, j, xmm4 );
1940  }
1941  }
1942  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1943  size_t j( 0UL );
1944  for( ; (j+2UL) <= N; j+=2UL ) {
1945  IntrinsicType xmm1( (~C).load(i ,j ) );
1946  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
1947  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1948  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
1949  for( size_t k=0UL; k<K; ++k ) {
1950  const IntrinsicType a1( A.load(i ,k) );
1951  const IntrinsicType a2( A.load(i+IT::size,k) );
1952  const IntrinsicType b1( set( B(k,j ) ) );
1953  const IntrinsicType b2( set( B(k,j+1UL) ) );
1954  xmm1 = xmm1 - a1 * b1;
1955  xmm2 = xmm2 - a2 * b1;
1956  xmm3 = xmm3 - a1 * b2;
1957  xmm4 = xmm4 - a2 * b2;
1958  }
1959  (~C).store( i , j , xmm1 );
1960  (~C).store( i+IT::size, j , xmm2 );
1961  (~C).store( i , j+1UL, xmm3 );
1962  (~C).store( i+IT::size, j+1UL, xmm4 );
1963  }
1964  if( j < N ) {
1965  IntrinsicType xmm1( (~C).load(i ,j) );
1966  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
1967  for( size_t k=0UL; k<K; ++k ) {
1968  const IntrinsicType b1( set( B(k,j) ) );
1969  xmm1 = xmm1 - A.load(i ,k) * b1;
1970  xmm2 = xmm2 - A.load(i+IT::size,k) * b1;
1971  }
1972  (~C).store( i , j, xmm1 );
1973  (~C).store( i+IT::size, j, xmm2 );
1974  }
1975  }
1976  if( i < M ) {
1977  size_t j( 0UL );
1978  for( ; (j+2UL) <= N; j+=2UL ) {
1979  IntrinsicType xmm1( (~C).load(i,j ) );
1980  IntrinsicType xmm2( (~C).load(i,j+1UL) );
1981  for( size_t k=0UL; k<K; ++k ) {
1982  const IntrinsicType a1( A.load(i,k) );
1983  xmm1 = xmm1 - a1 * set( B(k,j ) );
1984  xmm2 = xmm2 - a1 * set( B(k,j+1UL) );
1985  }
1986  (~C).store( i, j , xmm1 );
1987  (~C).store( i, j+1UL, xmm2 );
1988  }
1989  if( j < N ) {
1990  IntrinsicType xmm1( (~C).load(i,j) );
1991  for( size_t k=0UL; k<K; ++k ) {
1992  xmm1 = xmm1 - A.load(i,k) * set( B(k,j) );
1993  }
1994  (~C).store( i, j, xmm1 );
1995  }
1996  }
1997  }
1999  //**********************************************************************************************
2000 
2001  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
2015  template< typename MT3 // Type of the left-hand side target matrix
2016  , typename MT4 // Type of the left-hand side matrix operand
2017  , typename MT5 > // Type of the right-hand side matrix operand
2018  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2019  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2020  {
2021  selectDefaultSubAssignKernel( C, A, B );
2022  }
2024  //**********************************************************************************************
2025 
2026  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
2027 #if BLAZE_BLAS_MODE
2028 
2041  template< typename MT3 // Type of the left-hand side target matrix
2042  , typename MT4 // Type of the left-hand side matrix operand
2043  , typename MT5 > // Type of the right-hand side matrix operand
2044  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2045  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2046  {
2047  sgemm( C, A, B, -1.0F, 1.0F );
2048  }
2050 #endif
2051  //**********************************************************************************************
2052 
2053  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
2054 #if BLAZE_BLAS_MODE
2055 
2068  template< typename MT3 // Type of the left-hand side target matrix
2069  , typename MT4 // Type of the left-hand side matrix operand
2070  , typename MT5 > // Type of the right-hand side matrix operand
2071  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2072  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2073  {
2074  dgemm( C, A, B, -1.0, 1.0 );
2075  }
2077 #endif
2078  //**********************************************************************************************
2079 
2080  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
2081 #if BLAZE_BLAS_MODE
2082 
2095  template< typename MT3 // Type of the left-hand side target matrix
2096  , typename MT4 // Type of the left-hand side matrix operand
2097  , typename MT5 > // Type of the right-hand side matrix operand
2098  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2099  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2100  {
2101  cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2102  }
2104 #endif
2105  //**********************************************************************************************
2106 
2107  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
2108 #if BLAZE_BLAS_MODE
2109 
2122  template< typename MT3 // Type of the left-hand side target matrix
2123  , typename MT4 // Type of the left-hand side matrix operand
2124  , typename MT5 > // Type of the right-hand side matrix operand
2125  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2126  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2127  {
2128  zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2129  }
2131 #endif
2132  //**********************************************************************************************
2133 
2134  //**Restructuring subtraction assignment to row-major matrices**********************************
2150  template< typename MT > // Type of the target matrix
2151  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2152  subAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
2153  {
2155 
2157 
2158  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2159  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2160 
2161  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2162  subAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2163  else if( IsSymmetric<MT1>::value )
2164  subAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2165  else
2166  subAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2167  }
2169  //**********************************************************************************************
2170 
2171  //**Subtraction assignment to sparse matrices***************************************************
2172  // No special implementation for the subtraction assignment to sparse matrices.
2173  //**********************************************************************************************
2174 
2175  //**Multiplication assignment to dense matrices*************************************************
2176  // No special implementation for the multiplication assignment to dense matrices.
2177  //**********************************************************************************************
2178 
2179  //**Multiplication assignment to sparse matrices************************************************
2180  // No special implementation for the multiplication assignment to sparse matrices.
2181  //**********************************************************************************************
2182 
2183  //**SMP assignment to dense matrices************************************************************
2199  template< typename MT // Type of the target dense matrix
2200  , bool SO > // Storage order of the target dense matrix
2201  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2202  smpAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2203  {
2205 
2206  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2207  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2208 
2209  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2210  return;
2211  }
2212  else if( rhs.lhs_.columns() == 0UL ) {
2213  reset( ~lhs );
2214  return;
2215  }
2216 
2217  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2218  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2219 
2220  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2221  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2222  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2223  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2224  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2225  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2226 
2227  smpAssign( ~lhs, A * B );
2228  }
2230  //**********************************************************************************************
2231 
2232  //**SMP assignment to sparse matrices***********************************************************
2248  template< typename MT // Type of the target sparse matrix
2249  , bool SO > // Storage order of the target sparse matrix
2250  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2251  smpAssign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2252  {
2254 
2255  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2256 
2263 
2264  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2265  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2266 
2267  const TmpType tmp( rhs );
2268  smpAssign( ~lhs, tmp );
2269  }
2271  //**********************************************************************************************
2272 
2273  //**Restructuring SMP assignment to row-major matrices******************************************
2288  template< typename MT > // Type of the target matrix
2289  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2290  smpAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
2291  {
2293 
2295 
2296  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2297  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2298 
2299  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2300  smpAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2301  else if( IsSymmetric<MT1>::value )
2302  smpAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2303  else
2304  smpAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2305  }
2307  //**********************************************************************************************
2308 
2309  //**SMP addition assignment to dense matrices***************************************************
2325  template< typename MT // Type of the target dense matrix
2326  , bool SO > // Storage order of the target dense matrix
2327  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2328  smpAddAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2329  {
2331 
2332  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2333  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2334 
2335  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2336  return;
2337  }
2338 
2339  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2340  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2341 
2342  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2343  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2344  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2345  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2346  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2347  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2348 
2349  smpAddAssign( ~lhs, A * B );
2350  }
2352  //**********************************************************************************************
2353 
2354  //**Restructuring SMP addition assignment to row-major matrices*********************************
2370  template< typename MT > // Type of the target matrix
2371  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2372  smpAddAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
2373  {
2375 
2377 
2378  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2379  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2380 
2381  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2382  smpAddAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2383  else if( IsSymmetric<MT1>::value )
2384  smpAddAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2385  else
2386  smpAddAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2387  }
2389  //**********************************************************************************************
2390 
2391  //**SMP addition assignment to sparse matrices**************************************************
2392  // No special implementation for the SMP addition assignment to sparse matrices.
2393  //**********************************************************************************************
2394 
2395  //**SMP subtraction assignment to dense matrices************************************************
2411  template< typename MT // Type of the target dense matrix
2412  , bool SO > // Storage order of the target dense matrix
2413  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2414  smpSubAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2415  {
2417 
2418  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2419  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2420 
2421  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2422  return;
2423  }
2424 
2425  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2426  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2427 
2428  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2429  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2430  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2431  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2432  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2433  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2434 
2435  smpSubAssign( ~lhs, A * B );
2436  }
2438  //**********************************************************************************************
2439 
2440  //**Restructuring SMP subtraction assignment to row-major matrices******************************
2456  template< typename MT > // Type of the target matrix
2457  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2458  smpSubAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
2459  {
2461 
2463 
2464  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2465  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2466 
2467  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2468  smpSubAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2469  else if( IsSymmetric<MT1>::value )
2470  smpSubAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2471  else
2472  smpSubAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2473  }
2475  //**********************************************************************************************
2476 
2477  //**SMP subtraction assignment to sparse matrices***********************************************
2478  // No special implementation for the SMP subtraction assignment to sparse matrices.
2479  //**********************************************************************************************
2480 
2481  //**SMP multiplication assignment to dense matrices*********************************************
2482  // No special implementation for the SMP multiplication assignment to dense matrices.
2483  //**********************************************************************************************
2484 
2485  //**SMP multiplication assignment to sparse matrices********************************************
2486  // No special implementation for the SMP multiplication assignment to sparse matrices.
2487  //**********************************************************************************************
2488 
2489  //**Compile time checks*************************************************************************
2497  //**********************************************************************************************
2498 };
2499 //*************************************************************************************************
2500 
2501 
2502 
2503 
2504 //=================================================================================================
2505 //
2506 // DMATSCALARMULTEXPR SPECIALIZATION
2507 //
2508 //=================================================================================================
2509 
2510 //*************************************************************************************************
2518 template< typename MT1 // Type of the left-hand side dense matrix
2519  , typename MT2 // Type of the right-hand side dense matrix
2520  , typename ST > // Type of the right-hand side scalar value
2521 class DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >
2522  : public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
2523  , private MatScalarMultExpr
2524  , private Computation
2525 {
2526  private:
2527  //**Type definitions****************************************************************************
2528  typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
2529  typedef typename MMM::ResultType RES;
2530  typedef typename MT1::ResultType RT1;
2531  typedef typename MT2::ResultType RT2;
2532  typedef typename RT1::ElementType ET1;
2533  typedef typename RT2::ElementType ET2;
2534  typedef typename MT1::CompositeType CT1;
2535  typedef typename MT2::CompositeType CT2;
2536  //**********************************************************************************************
2537 
2538  //**********************************************************************************************
2540  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2541  //**********************************************************************************************
2542 
2543  //**********************************************************************************************
2545  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2546  //**********************************************************************************************
2547 
2548  //**********************************************************************************************
2550 
2555  template< typename T1, typename T2, typename T3 >
2556  struct CanExploitSymmetry {
2557  enum { value = IsRowMajorMatrix<T1>::value &&
2558  ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
2559  };
2560  //**********************************************************************************************
2561 
2562  //**********************************************************************************************
2564 
2567  template< typename T1, typename T2, typename T3 >
2568  struct IsEvaluationRequired {
2569  enum { value = ( evaluateLeft || evaluateRight ) &&
2570  !CanExploitSymmetry<T1,T2,T3>::value };
2571  };
2572  //**********************************************************************************************
2573 
2574  //**********************************************************************************************
2576 
2579  template< typename T1, typename T2, typename T3, typename T4 >
2580  struct UseSinglePrecisionKernel {
2581  enum { value = BLAZE_BLAS_MODE &&
2582  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2583  IsFloat<typename T1::ElementType>::value &&
2584  IsFloat<typename T2::ElementType>::value &&
2585  IsFloat<typename T3::ElementType>::value &&
2586  !IsComplex<T4>::value };
2587  };
2588  //**********************************************************************************************
2589 
2590  //**********************************************************************************************
2592 
2595  template< typename T1, typename T2, typename T3, typename T4 >
2596  struct UseDoublePrecisionKernel {
2597  enum { value = BLAZE_BLAS_MODE &&
2598  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2599  IsDouble<typename T1::ElementType>::value &&
2600  IsDouble<typename T2::ElementType>::value &&
2601  IsDouble<typename T3::ElementType>::value &&
2602  !IsComplex<T4>::value };
2603  };
2604  //**********************************************************************************************
2605 
2606  //**********************************************************************************************
2608 
2611  template< typename T1, typename T2, typename T3 >
2612  struct UseSinglePrecisionComplexKernel {
2613  typedef complex<float> Type;
2614  enum { value = BLAZE_BLAS_MODE &&
2615  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2616  IsSame<typename T1::ElementType,Type>::value &&
2617  IsSame<typename T2::ElementType,Type>::value &&
2618  IsSame<typename T3::ElementType,Type>::value };
2619  };
2620  //**********************************************************************************************
2621 
2622  //**********************************************************************************************
2624 
2627  template< typename T1, typename T2, typename T3 >
2628  struct UseDoublePrecisionComplexKernel {
2629  typedef complex<double> Type;
2630  enum { value = BLAZE_BLAS_MODE &&
2631  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2632  IsSame<typename T1::ElementType,Type>::value &&
2633  IsSame<typename T2::ElementType,Type>::value &&
2634  IsSame<typename T3::ElementType,Type>::value };
2635  };
2636  //**********************************************************************************************
2637 
2638  //**********************************************************************************************
2640 
2642  template< typename T1, typename T2, typename T3, typename T4 >
2643  struct UseDefaultKernel {
2644  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2645  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2646  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2647  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2648  };
2649  //**********************************************************************************************
2650 
2651  //**********************************************************************************************
2653 
2655  template< typename T1, typename T2, typename T3, typename T4 >
2656  struct UseVectorizedDefaultKernel {
2657  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2658  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2659  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2660  IsSame<typename T1::ElementType,T4>::value &&
2661  IntrinsicTrait<typename T1::ElementType>::addition &&
2662  IntrinsicTrait<typename T1::ElementType>::subtraction &&
2663  IntrinsicTrait<typename T1::ElementType>::multiplication };
2664  };
2665  //**********************************************************************************************
2666 
2667  public:
2668  //**Type definitions****************************************************************************
2669  typedef DMatScalarMultExpr<MMM,ST,true> This;
2670  typedef typename MultTrait<RES,ST>::Type ResultType;
2671  typedef typename ResultType::OppositeType OppositeType;
2672  typedef typename ResultType::TransposeType TransposeType;
2673  typedef typename ResultType::ElementType ElementType;
2674  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2675  typedef const ElementType ReturnType;
2676  typedef const ResultType CompositeType;
2677 
2679  typedef const TDMatTDMatMultExpr<MT1,MT2> LeftOperand;
2680 
2682  typedef ST RightOperand;
2683 
2685  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2686 
2688  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2689  //**********************************************************************************************
2690 
2691  //**Compilation flags***************************************************************************
2693  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2694  IsSame<ET1,ET2>::value &&
2695  IsSame<ET1,ST>::value &&
2696  IntrinsicTrait<ET1>::addition &&
2697  IntrinsicTrait<ET1>::multiplication };
2698 
2700  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2701  !evaluateRight && MT2::smpAssignable };
2702  //**********************************************************************************************
2703 
2704  //**Constructor*********************************************************************************
2710  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2711  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2712  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2713  {}
2714  //**********************************************************************************************
2715 
2716  //**Access operator*****************************************************************************
2723  inline ReturnType operator()( size_t i, size_t j ) const {
2724  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2725  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2726  return matrix_(i,j) * scalar_;
2727  }
2728  //**********************************************************************************************
2729 
2730  //**Rows function*******************************************************************************
2735  inline size_t rows() const {
2736  return matrix_.rows();
2737  }
2738  //**********************************************************************************************
2739 
2740  //**Columns function****************************************************************************
2745  inline size_t columns() const {
2746  return matrix_.columns();
2747  }
2748  //**********************************************************************************************
2749 
2750  //**Left operand access*************************************************************************
2755  inline LeftOperand leftOperand() const {
2756  return matrix_;
2757  }
2758  //**********************************************************************************************
2759 
2760  //**Right operand access************************************************************************
2765  inline RightOperand rightOperand() const {
2766  return scalar_;
2767  }
2768  //**********************************************************************************************
2769 
2770  //**********************************************************************************************
2776  template< typename T >
2777  inline bool canAlias( const T* alias ) const {
2778  return matrix_.canAlias( alias );
2779  }
2780  //**********************************************************************************************
2781 
2782  //**********************************************************************************************
2788  template< typename T >
2789  inline bool isAliased( const T* alias ) const {
2790  return matrix_.isAliased( alias );
2791  }
2792  //**********************************************************************************************
2793 
2794  //**********************************************************************************************
2799  inline bool isAligned() const {
2800  return matrix_.isAligned();
2801  }
2802  //**********************************************************************************************
2803 
2804  //**********************************************************************************************
2809  inline bool canSMPAssign() const {
2810  typename MMM::RightOperand B( matrix_.rightOperand() );
2811  return ( !BLAZE_BLAS_IS_PARALLEL ||
2812  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
2813  ( B.columns() > SMP_TDMATTDMATMULT_THRESHOLD );
2814  }
2815  //**********************************************************************************************
2816 
2817  private:
2818  //**Member variables****************************************************************************
2819  LeftOperand matrix_;
2820  RightOperand scalar_;
2821  //**********************************************************************************************
2822 
2823  //**BLAS kernel (single precision)**************************************************************
2824 #if BLAZE_BLAS_MODE
2825 
2839  template< typename MT3 // Type of the left-hand side target matrix
2840  , typename MT4 // Type of the left-hand side matrix operand
2841  , typename MT5 > // Type of the right-hand side matrix operand
2842  static inline void sgemm( MT3& C, const MT4& A, const MT5& B, float alpha, float beta )
2843  {
2844  using boost::numeric_cast;
2845 
2849 
2850  const int M ( numeric_cast<int>( A.rows() ) );
2851  const int N ( numeric_cast<int>( B.columns() ) );
2852  const int K ( numeric_cast<int>( A.columns() ) );
2853  const int lda( numeric_cast<int>( A.spacing() ) );
2854  const int ldb( numeric_cast<int>( B.spacing() ) );
2855  const int ldc( numeric_cast<int>( C.spacing() ) );
2856 
2857  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2858  cblas_ssymm( CblasColMajor, CblasLeft, CblasUpper,
2859  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2860  }
2861  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
2862  cblas_ssymm( CblasColMajor, CblasRight, CblasUpper,
2863  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2864  }
2865  else {
2866  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2867  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2868  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2869  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2870  }
2871  }
2872 #endif
2873  //**********************************************************************************************
2874 
2875  //**BLAS kernel (double precision)**************************************************************
2876 #if BLAZE_BLAS_MODE
2877 
2891  template< typename MT3 // Type of the left-hand side target matrix
2892  , typename MT4 // Type of the left-hand side matrix operand
2893  , typename MT5 > // Type of the right-hand side matrix operand
2894  static inline void dgemm( MT3& C, const MT4& A, const MT5& B, double alpha, double beta )
2895  {
2896  using boost::numeric_cast;
2897 
2901 
2902  const int M ( numeric_cast<int>( A.rows() ) );
2903  const int N ( numeric_cast<int>( B.columns() ) );
2904  const int K ( numeric_cast<int>( A.columns() ) );
2905  const int lda( numeric_cast<int>( A.spacing() ) );
2906  const int ldb( numeric_cast<int>( B.spacing() ) );
2907  const int ldc( numeric_cast<int>( C.spacing() ) );
2908 
2909  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2910  cblas_dsymm( CblasColMajor, CblasLeft, CblasUpper,
2911  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2912  }
2913  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
2914  cblas_dsymm( CblasColMajor, CblasRight, CblasUpper,
2915  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2916  }
2917  else {
2918  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2919  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2920  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2921  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2922  }
2923  }
2924 #endif
2925  //**********************************************************************************************
2926 
2927  //**BLAS kernel (single precision complex)******************************************************
2928 #if BLAZE_BLAS_MODE
2929 
2944  template< typename MT3 // Type of the left-hand side target matrix
2945  , typename MT4 // Type of the left-hand side matrix operand
2946  , typename MT5 > // Type of the right-hand side matrix operand
2947  static inline void cgemm( MT3& C, const MT4& A, const MT5& B,
2948  complex<float> alpha, complex<float> beta )
2949  {
2950  using boost::numeric_cast;
2951 
2955  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2956  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2957  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2958 
2959  const int M ( numeric_cast<int>( A.rows() ) );
2960  const int N ( numeric_cast<int>( B.columns() ) );
2961  const int K ( numeric_cast<int>( A.columns() ) );
2962  const int lda( numeric_cast<int>( A.spacing() ) );
2963  const int ldb( numeric_cast<int>( B.spacing() ) );
2964  const int ldc( numeric_cast<int>( C.spacing() ) );
2965 
2966  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2967  cblas_csymm( CblasColMajor, CblasLeft, CblasUpper,
2968  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2969  }
2970  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
2971  cblas_csymm( CblasColMajor, CblasRight, CblasUpper,
2972  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
2973  }
2974  else {
2975  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2976  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2977  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2978  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2979  }
2980  }
2981 #endif
2982  //**********************************************************************************************
2983 
2984  //**BLAS kernel (double precision complex)******************************************************
2985 #if BLAZE_BLAS_MODE
2986 
3001  template< typename MT3 // Type of the left-hand side target matrix
3002  , typename MT4 // Type of the left-hand side matrix operand
3003  , typename MT5 > // Type of the right-hand side matrix operand
3004  static inline void zgemm( MT3& C, const MT4& A, const MT5& B,
3005  complex<double> alpha, complex<double> beta )
3006  {
3007  using boost::numeric_cast;
3008 
3012  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3013  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3014  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3015 
3016  const int M ( numeric_cast<int>( A.rows() ) );
3017  const int N ( numeric_cast<int>( B.columns() ) );
3018  const int K ( numeric_cast<int>( A.columns() ) );
3019  const int lda( numeric_cast<int>( A.spacing() ) );
3020  const int ldb( numeric_cast<int>( B.spacing() ) );
3021  const int ldc( numeric_cast<int>( C.spacing() ) );
3022 
3023  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
3024  cblas_zsymm( CblasColMajor, CblasLeft, CblasUpper,
3025  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3026  }
3027  else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
3028  cblas_zsymm( CblasColMajor, CblasRight, CblasUpper,
3029  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
3030  }
3031  else {
3032  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3033  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3034  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3035  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3036  }
3037  }
3038 #endif
3039  //**********************************************************************************************
3040 
3041  //**Assignment to dense matrices****************************************************************
3053  template< typename MT // Type of the target dense matrix
3054  , bool SO > // Storage order of the target dense matrix
3055  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3056  assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3057  {
3059 
3060  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3061  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3062 
3063  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3064  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3065 
3066  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
3067  return;
3068  }
3069  else if( left.columns() == 0UL ) {
3070  reset( ~lhs );
3071  return;
3072  }
3073 
3074  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3075  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3076 
3077  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3078  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3079  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3080  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3081  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3082  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3083 
3084  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
3085  }
3086  //**********************************************************************************************
3087 
3088  //**Assignment to dense matrices (kernel selection)*********************************************
3099  template< typename MT3 // Type of the left-hand side target matrix
3100  , typename MT4 // Type of the left-hand side matrix operand
3101  , typename MT5 // Type of the right-hand side matrix operand
3102  , typename ST2 > // Type of the scalar value
3103  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3104  {
3105  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
3106  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
3107  else
3108  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
3109  }
3110  //**********************************************************************************************
3111 
3112  //**Default assignment to dense matrices********************************************************
3126  template< typename MT3 // Type of the left-hand side target matrix
3127  , typename MT4 // Type of the left-hand side matrix operand
3128  , typename MT5 // Type of the right-hand side matrix operand
3129  , typename ST2 > // Type of the scalar value
3130  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3131  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3132  {
3133  for( size_t i=0UL; i<A.rows(); ++i ) {
3134  for( size_t k=0UL; k<B.columns(); ++k ) {
3135  C(i,k) = A(i,0UL) * B(0UL,k);
3136  }
3137  for( size_t j=1UL; j<A.columns(); ++j ) {
3138  for( size_t k=0UL; k<B.columns(); ++k ) {
3139  C(i,k) += A(i,j) * B(j,k);
3140  }
3141  }
3142  for( size_t k=0UL; k<B.columns(); ++k ) {
3143  C(i,k) *= scalar;
3144  }
3145  }
3146  }
3147  //**********************************************************************************************
3148 
3149  //**Vectorized default assignment to row-major dense matrices***********************************
3163  template< typename MT3 // Type of the left-hand side target matrix
3164  , typename MT4 // Type of the left-hand side matrix operand
3165  , typename MT5 // Type of the right-hand side matrix operand
3166  , typename ST2 > // Type of the scalar value
3167  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3168  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3169  {
3172 
3173  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3174  const typename MT5::OppositeType tmp( serial( B ) );
3175  assign( ~C, A * tmp * scalar );
3176  }
3177  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3178  const typename MT4::OppositeType tmp( serial( A ) );
3179  assign( ~C, tmp * B * scalar );
3180  }
3181  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3182  const typename MT5::OppositeType tmp( serial( B ) );
3183  assign( ~C, A * tmp * scalar );
3184  }
3185  else {
3186  const typename MT4::OppositeType tmp( serial( A ) );
3187  assign( ~C, tmp * B * scalar );
3188  }
3189  }
3190  //**********************************************************************************************
3191 
3192  //**Vectorized default assignment to column-major dense matrices********************************
3206  template< typename MT3 // Type of the left-hand side target matrix
3207  , typename MT4 // Type of the left-hand side matrix operand
3208  , typename MT5 // Type of the right-hand side matrix operand
3209  , typename ST2 > // Type of the scalar value
3210  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3211  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3212  {
3213  typedef IntrinsicTrait<ElementType> IT;
3214 
3215  const size_t M( A.rows() );
3216  const size_t N( B.columns() );
3217  const size_t K( A.columns() );
3218 
3219  const IntrinsicType factor( set( scalar ) );
3220 
3221  size_t i( 0UL );
3222 
3223  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3224  for( size_t j=0UL; j<N; ++j ) {
3225  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3226  for( size_t k=0UL; k<K; ++k ) {
3227  const IntrinsicType b1( set( B(k,j) ) );
3228  xmm1 = xmm1 + A.load(i ,k) * b1;
3229  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3230  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3231  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3232  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3233  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3234  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3235  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3236  }
3237  (~C).store( i , j, xmm1 * factor );
3238  (~C).store( i+IT::size , j, xmm2 * factor );
3239  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
3240  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
3241  (~C).store( i+IT::size*4UL, j, xmm5 * factor );
3242  (~C).store( i+IT::size*5UL, j, xmm6 * factor );
3243  (~C).store( i+IT::size*6UL, j, xmm7 * factor );
3244  (~C).store( i+IT::size*7UL, j, xmm8 * factor );
3245  }
3246  }
3247  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3248  size_t j( 0UL );
3249  for( ; (j+2UL) <= N; j+=2UL ) {
3250  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3251  for( size_t k=0UL; k<K; ++k ) {
3252  const IntrinsicType a1( A.load(i ,k) );
3253  const IntrinsicType a2( A.load(i+IT::size ,k) );
3254  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
3255  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
3256  const IntrinsicType b1( set( B(k,j ) ) );
3257  const IntrinsicType b2( set( B(k,j+1UL) ) );
3258  xmm1 = xmm1 + a1 * b1;
3259  xmm2 = xmm2 + a2 * b1;
3260  xmm3 = xmm3 + a3 * b1;
3261  xmm4 = xmm4 + a4 * b1;
3262  xmm5 = xmm5 + a1 * b2;
3263  xmm6 = xmm6 + a2 * b2;
3264  xmm7 = xmm7 + a3 * b2;
3265  xmm8 = xmm8 + a4 * b2;
3266  }
3267  (~C).store( i , j , xmm1 * factor );
3268  (~C).store( i+IT::size , j , xmm2 * factor );
3269  (~C).store( i+IT::size*2UL, j , xmm3 * factor );
3270  (~C).store( i+IT::size*3UL, j , xmm4 * factor );
3271  (~C).store( i , j+1UL, xmm5 * factor );
3272  (~C).store( i+IT::size , j+1UL, xmm6 * factor );
3273  (~C).store( i+IT::size*2UL, j+1UL, xmm7 * factor );
3274  (~C).store( i+IT::size*3UL, j+1UL, xmm8 * factor );
3275  }
3276  if( j < N ) {
3277  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3278  for( size_t k=0UL; k<K; ++k ) {
3279  const IntrinsicType b1( set( B(k,j) ) );
3280  xmm1 = xmm1 + A.load(i ,k) * b1;
3281  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3282  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3283  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3284  }
3285  (~C).store( i , j, xmm1 * factor );
3286  (~C).store( i+IT::size , j, xmm2 * factor );
3287  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
3288  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
3289  }
3290  }
3291  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3292  size_t j( 0UL );
3293  for( ; (j+2UL) <= N; j+=2UL ) {
3294  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3295  for( size_t k=0UL; k<K; ++k ) {
3296  const IntrinsicType a1( A.load(i ,k) );
3297  const IntrinsicType a2( A.load(i+IT::size,k) );
3298  const IntrinsicType b1( set( B(k,j ) ) );
3299  const IntrinsicType b2( set( B(k,j+1UL) ) );
3300  xmm1 = xmm1 + a1 * b1;
3301  xmm2 = xmm2 + a2 * b1;
3302  xmm3 = xmm3 + a1 * b2;
3303  xmm4 = xmm4 + a2 * b2;
3304  }
3305  (~C).store( i , j , xmm1 * factor );
3306  (~C).store( i+IT::size, j , xmm2 * factor );
3307  (~C).store( i , j+1UL, xmm3 * factor );
3308  (~C).store( i+IT::size, j+1UL, xmm4 * factor );
3309  }
3310  if( j < N ) {
3311  IntrinsicType xmm1, xmm2;
3312  for( size_t k=0UL; k<K; ++k ) {
3313  const IntrinsicType b1( set( B(k,j) ) );
3314  xmm1 = xmm1 + A.load(i ,k) * b1;
3315  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3316  }
3317  (~C).store( i , j, xmm1 * factor );
3318  (~C).store( i+IT::size, j, xmm2 * factor );
3319  }
3320  }
3321  if( i < M ) {
3322  size_t j( 0UL );
3323  for( ; (j+2UL) <= N; j+=2UL ) {
3324  IntrinsicType xmm1, xmm2;
3325  for( size_t k=0UL; k<K; ++k ) {
3326  const IntrinsicType a1( A.load(i,k) );
3327  xmm1 = xmm1 + a1 * set( B(k,j ) );
3328  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3329  }
3330  (~C).store( i, j , xmm1 * factor );
3331  (~C).store( i, j+1UL, xmm2 * factor );
3332  }
3333  if( j < N ) {
3334  IntrinsicType xmm1;
3335  for( size_t k=0UL; k<K; ++k ) {
3336  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
3337  }
3338  (~C).store( i, j, xmm1 * factor );
3339  }
3340  }
3341  }
3342  //**********************************************************************************************
3343 
3344  //**BLAS-based assignment to dense matrices (default)*******************************************
3358  template< typename MT3 // Type of the left-hand side target matrix
3359  , typename MT4 // Type of the left-hand side matrix operand
3360  , typename MT5 // Type of the right-hand side matrix operand
3361  , typename ST2 > // Type of the scalar value
3362  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3363  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3364  {
3365  selectDefaultAssignKernel( C, A, B, scalar );
3366  }
3367  //**********************************************************************************************
3368 
3369  //**BLAS-based assignment to dense matrices (single precision)**********************************
3370 #if BLAZE_BLAS_MODE
3371 
3384  template< typename MT3 // Type of the left-hand side target matrix
3385  , typename MT4 // Type of the left-hand side matrix operand
3386  , typename MT5 // Type of the right-hand side matrix operand
3387  , typename ST2 > // Type of the scalar value
3388  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3389  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3390  {
3391  sgemm( C, A, B, scalar, 0.0F );
3392  }
3393 #endif
3394  //**********************************************************************************************
3395 
3396  //**BLAS-based assignment to dense matrices (double precision)**********************************
3397 #if BLAZE_BLAS_MODE
3398 
3411  template< typename MT3 // Type of the left-hand side target matrix
3412  , typename MT4 // Type of the left-hand side matrix operand
3413  , typename MT5 // Type of the right-hand side matrix operand
3414  , typename ST2 > // Type of the scalar value
3415  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3416  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3417  {
3418  dgemm( C, A, B, scalar, 0.0 );
3419  }
3420 #endif
3421  //**********************************************************************************************
3422 
3423  //**BLAS-based assignment to dense matrices (single precision complex)**************************
3424 #if BLAZE_BLAS_MODE
3425 
3438  template< typename MT3 // Type of the left-hand side target matrix
3439  , typename MT4 // Type of the left-hand side matrix operand
3440  , typename MT5 // Type of the right-hand side matrix operand
3441  , typename ST2 > // Type of the scalar value
3442  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3443  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3444  {
3445  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3446  }
3447 #endif
3448  //**********************************************************************************************
3449 
3450  //**BLAS-based assignment to dense matrices (double precision complex)**************************
3451 #if BLAZE_BLAS_MODE
3452 
3465  template< typename MT3 // Type of the left-hand side target matrix
3466  , typename MT4 // Type of the left-hand side matrix operand
3467  , typename MT5 // Type of the right-hand side matrix operand
3468  , typename ST2 > // Type of the scalar value
3469  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3470  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3471  {
3472  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3473  }
3474 #endif
3475  //**********************************************************************************************
3476 
3477  //**Assignment to sparse matrices***************************************************************
3489  template< typename MT // Type of the target sparse matrix
3490  , bool SO > // Storage order of the target sparse matrix
3491  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3492  assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3493  {
3495 
3496  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
3497 
3504 
3505  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3506  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3507 
3508  const TmpType tmp( serial( rhs ) );
3509  assign( ~lhs, tmp );
3510  }
3511  //**********************************************************************************************
3512 
3513  //**Restructuring assignment to row-major matrices**********************************************
3527  template< typename MT > // Type of the target matrix
3528  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3529  assign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
3530  {
3532 
3534 
3535  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3536  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3537 
3538  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3539  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3540 
3541  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
3542  assign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
3543  else if( IsSymmetric<MT1>::value )
3544  assign( ~lhs, trans( left ) * right * rhs.scalar_ );
3545  else
3546  assign( ~lhs, left * trans( right ) * rhs.scalar_ );
3547  }
3548  //**********************************************************************************************
3549 
3550  //**Addition assignment to dense matrices*******************************************************
3562  template< typename MT // Type of the target dense matrix
3563  , bool SO > // Storage order of the target dense matrix
3564  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3565  {
3567 
3568  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3569  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3570 
3571  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3572  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3573 
3574  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3575  return;
3576  }
3577 
3578  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3579  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3580 
3581  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3582  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3583  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3584  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3585  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3586  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3587 
3588  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3589  }
3590  //**********************************************************************************************
3591 
3592  //**Addition assignment to dense matrices (kernel selection)************************************
3603  template< typename MT3 // Type of the left-hand side target matrix
3604  , typename MT4 // Type of the left-hand side matrix operand
3605  , typename MT5 // Type of the right-hand side matrix operand
3606  , typename ST2 > // Type of the scalar value
3607  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3608  {
3609  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
3610  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3611  else
3612  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3613  }
3614  //**********************************************************************************************
3615 
3616  //**Default addition assignment to dense matrices***********************************************
3630  template< typename MT3 // Type of the left-hand side target matrix
3631  , typename MT4 // Type of the left-hand side matrix operand
3632  , typename MT5 // Type of the right-hand side matrix operand
3633  , typename ST2 > // Type of the scalar value
3634  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3635  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3636  {
3637  const ResultType tmp( serial( A * B * scalar ) );
3638  addAssign( C, tmp );
3639  }
3640  //**********************************************************************************************
3641 
3642  //**Vectorized default addition assignment to row-major dense matrices**************************
3656  template< typename MT3 // Type of the left-hand side target matrix
3657  , typename MT4 // Type of the left-hand side matrix operand
3658  , typename MT5 // Type of the right-hand side matrix operand
3659  , typename ST2 > // Type of the scalar value
3660  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3661  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3662  {
3665 
3666  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3667  const typename MT5::OppositeType tmp( serial( B ) );
3668  addAssign( ~C, A * tmp * scalar );
3669  }
3670  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3671  const typename MT4::OppositeType tmp( serial( A ) );
3672  addAssign( ~C, tmp * B * scalar );
3673  }
3674  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3675  const typename MT5::OppositeType tmp( serial( B ) );
3676  addAssign( ~C, A * tmp * scalar );
3677  }
3678  else {
3679  const typename MT4::OppositeType tmp( serial( A ) );
3680  addAssign( ~C, tmp * B * scalar );
3681  }
3682  }
3683  //**********************************************************************************************
3684 
3685  //**Vectorized default addition assignment to column-major dense matrices***********************
3699  template< typename MT3 // Type of the left-hand side target matrix
3700  , typename MT4 // Type of the left-hand side matrix operand
3701  , typename MT5 // Type of the right-hand side matrix operand
3702  , typename ST2 > // Type of the scalar value
3703  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3704  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3705  {
3706  typedef IntrinsicTrait<ElementType> IT;
3707 
3708  const size_t M( A.rows() );
3709  const size_t N( B.columns() );
3710  const size_t K( A.columns() );
3711 
3712  const IntrinsicType factor( set( scalar ) );
3713 
3714  size_t i( 0UL );
3715 
3716  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3717  for( size_t j=0UL; j<N; ++j ) {
3718  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3719  for( size_t k=0UL; k<K; ++k ) {
3720  const IntrinsicType b1( set( B(k,j) ) );
3721  xmm1 = xmm1 + A.load(i ,k) * b1;
3722  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3723  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3724  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3725  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3726  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3727  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3728  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3729  }
3730  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3731  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3732  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3733  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3734  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) + xmm5 * factor );
3735  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) + xmm6 * factor );
3736  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) + xmm7 * factor );
3737  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) + xmm8 * factor );
3738  }
3739  }
3740  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3741  size_t j( 0UL );
3742  for( ; (j+2UL) <= N; j+=2UL ) {
3743  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3744  for( size_t k=0UL; k<K; ++k ) {
3745  const IntrinsicType a1( A.load(i ,k) );
3746  const IntrinsicType a2( A.load(i+IT::size ,k) );
3747  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
3748  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
3749  const IntrinsicType b1( set( B(k,j ) ) );
3750  const IntrinsicType b2( set( B(k,j+1UL) ) );
3751  xmm1 = xmm1 + a1 * b1;
3752  xmm2 = xmm2 + a2 * b1;
3753  xmm3 = xmm3 + a3 * b1;
3754  xmm4 = xmm4 + a4 * b1;
3755  xmm5 = xmm5 + a1 * b2;
3756  xmm6 = xmm6 + a2 * b2;
3757  xmm7 = xmm7 + a3 * b2;
3758  xmm8 = xmm8 + a4 * b2;
3759  }
3760  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3761  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) + xmm2 * factor );
3762  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) + xmm3 * factor );
3763  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) + xmm4 * factor );
3764  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
3765  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) + xmm6 * factor );
3766  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) + xmm7 * factor );
3767  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) + xmm8 * factor );
3768  }
3769  if( j < N ) {
3770  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3771  for( size_t k=0UL; k<K; ++k ) {
3772  const IntrinsicType b1( set( B(k,j) ) );
3773  xmm1 = xmm1 + A.load(i ,k) * b1;
3774  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3775  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3776  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3777  }
3778  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3779  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3780  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3781  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3782  }
3783  }
3784  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3785  size_t j( 0UL );
3786  for( ; (j+2UL) <= N; j+=2UL ) {
3787  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3788  for( size_t k=0UL; k<K; ++k ) {
3789  const IntrinsicType a1( A.load(i ,k) );
3790  const IntrinsicType a2( A.load(i+IT::size,k) );
3791  const IntrinsicType b1( set( B(k,j ) ) );
3792  const IntrinsicType b2( set( B(k,j+1UL) ) );
3793  xmm1 = xmm1 + a1 * b1;
3794  xmm2 = xmm2 + a2 * b1;
3795  xmm3 = xmm3 + a1 * b2;
3796  xmm4 = xmm4 + a2 * b2;
3797  }
3798  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3799  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) + xmm2 * factor );
3800  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
3801  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) + xmm4 * factor );
3802  }
3803  if( j < N ) {
3804  IntrinsicType xmm1, xmm2;
3805  for( size_t k=0UL; k<K; ++k ) {
3806  const IntrinsicType b1( set( B(k,j) ) );
3807  xmm1 = xmm1 + A.load(i ,k) * b1;
3808  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3809  }
3810  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3811  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) + xmm2 * factor );
3812  }
3813  }
3814  if( i < M ) {
3815  size_t j( 0UL );
3816  for( ; (j+2UL) <= N; j+=2UL ) {
3817  IntrinsicType xmm1, xmm2;
3818  for( size_t k=0UL; k<K; ++k ) {
3819  const IntrinsicType a1( A.load(i,k) );
3820  xmm1 = xmm1 + a1 * set( B(k,j ) );
3821  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3822  }
3823  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3824  (~C).store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
3825  }
3826  if( j < N ) {
3827  IntrinsicType xmm1;
3828  for( size_t k=0UL; k<K; ++k ) {
3829  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
3830  }
3831  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
3832  }
3833  }
3834  }
3835  //**********************************************************************************************
3836 
3837  //**BLAS-based addition assignment to dense matrices (default)**********************************
3851  template< typename MT3 // Type of the left-hand side target matrix
3852  , typename MT4 // Type of the left-hand side matrix operand
3853  , typename MT5 // Type of the right-hand side matrix operand
3854  , typename ST2 > // Type of the scalar value
3855  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3856  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3857  {
3858  selectDefaultAddAssignKernel( C, A, B, scalar );
3859  }
3860  //**********************************************************************************************
3861 
3862  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3863 #if BLAZE_BLAS_MODE
3864 
3877  template< typename MT3 // Type of the left-hand side target matrix
3878  , typename MT4 // Type of the left-hand side matrix operand
3879  , typename MT5 // Type of the right-hand side matrix operand
3880  , typename ST2 > // Type of the scalar value
3881  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3882  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3883  {
3884  sgemm( C, A, B, scalar, 1.0F );
3885  }
3886 #endif
3887  //**********************************************************************************************
3888 
3889  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3890 #if BLAZE_BLAS_MODE
3891 
3904  template< typename MT3 // Type of the left-hand side target matrix
3905  , typename MT4 // Type of the left-hand side matrix operand
3906  , typename MT5 // Type of the right-hand side matrix operand
3907  , typename ST2 > // Type of the scalar value
3908  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3909  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3910  {
3911  dgemm( C, A, B, scalar, 1.0 );
3912  }
3913 #endif
3914  //**********************************************************************************************
3915 
3916  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3917 #if BLAZE_BLAS_MODE
3918 
3931  template< typename MT3 // Type of the left-hand side target matrix
3932  , typename MT4 // Type of the left-hand side matrix operand
3933  , typename MT5 // Type of the right-hand side matrix operand
3934  , typename ST2 > // Type of the scalar value
3935  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3936  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3937  {
3938  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3939  }
3940 #endif
3941  //**********************************************************************************************
3942 
3943  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3944 #if BLAZE_BLAS_MODE
3945 
3958  template< typename MT3 // Type of the left-hand side target matrix
3959  , typename MT4 // Type of the left-hand side matrix operand
3960  , typename MT5 // Type of the right-hand side matrix operand
3961  , typename ST2 > // Type of the scalar value
3962  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3963  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3964  {
3965  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3966  }
3967 #endif
3968  //**********************************************************************************************
3969 
3970  //**Restructuring addition assignment to row-major matrices*************************************
3985  template< typename MT > // Type of the target matrix
3986  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3987  addAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
3988  {
3990 
3992 
3993  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3994  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3995 
3996  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3997  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3998 
3999  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4000  addAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
4001  else if( IsSymmetric<MT1>::value )
4002  addAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
4003  else
4004  addAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4005  }
4006  //**********************************************************************************************
4007 
4008  //**Addition assignment to sparse matrices******************************************************
4009  // No special implementation for the addition assignment to sparse matrices.
4010  //**********************************************************************************************
4011 
4012  //**Subtraction assignment to dense matrices****************************************************
4024  template< typename MT // Type of the target dense matrix
4025  , bool SO > // Storage order of the target dense matrix
4026  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4027  {
4029 
4030  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4031  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4032 
4033  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4034  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4035 
4036  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4037  return;
4038  }
4039 
4040  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4041  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4042 
4043  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4044  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4045  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4046  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4047  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4048  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4049 
4050  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
4051  }
4052  //**********************************************************************************************
4053 
4054  //**Subtraction assignment to dense matrices (kernel selection)*********************************
4065  template< typename MT3 // Type of the left-hand side target matrix
4066  , typename MT4 // Type of the left-hand side matrix operand
4067  , typename MT5 // Type of the right-hand side matrix operand
4068  , typename ST2 > // Type of the scalar value
4069  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4070  {
4071  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
4072  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
4073  else
4074  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
4075  }
4076  //**********************************************************************************************
4077 
4078  //**Default subtraction assignment to dense matrices********************************************
4092  template< typename MT3 // Type of the left-hand side target matrix
4093  , typename MT4 // Type of the left-hand side matrix operand
4094  , typename MT5 // Type of the right-hand side matrix operand
4095  , typename ST2 > // Type of the scalar value
4096  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4097  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4098  {
4099  const ResultType tmp( serial( A * B * scalar ) );
4100  subAssign( C, tmp );
4101  }
4102  //**********************************************************************************************
4103 
4104  //**Vectorized default subtraction assignment to row-major dense matrices***********************
4118  template< typename MT3 // Type of the left-hand side target matrix
4119  , typename MT4 // Type of the left-hand side matrix operand
4120  , typename MT5 // Type of the right-hand side matrix operand
4121  , typename ST2 > // Type of the scalar value
4122  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4123  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4124  {
4127 
4128  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
4129  const typename MT5::OppositeType tmp( serial( B ) );
4130  subAssign( ~C, A * tmp * scalar );
4131  }
4132  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
4133  const typename MT4::OppositeType tmp( serial( A ) );
4134  subAssign( ~C, tmp * B * scalar );
4135  }
4136  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
4137  const typename MT5::OppositeType tmp( serial( B ) );
4138  subAssign( ~C, A * tmp * scalar );
4139  }
4140  else {
4141  const typename MT4::OppositeType tmp( serial( A ) );
4142  subAssign( ~C, tmp * B * scalar );
4143  }
4144  }
4145  //**********************************************************************************************
4146 
4147  //**Vectorized default subtraction assignment to column-major dense matrices********************
4161  template< typename MT3 // Type of the left-hand side target matrix
4162  , typename MT4 // Type of the left-hand side matrix operand
4163  , typename MT5 // Type of the right-hand side matrix operand
4164  , typename ST2 > // Type of the scalar value
4165  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4166  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4167  {
4168  typedef IntrinsicTrait<ElementType> IT;
4169 
4170  const size_t M( A.rows() );
4171  const size_t N( B.columns() );
4172  const size_t K( A.columns() );
4173 
4174  const IntrinsicType factor( set( scalar ) );
4175 
4176  size_t i( 0UL );
4177 
4178  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
4179  for( size_t j=0UL; j<N; ++j ) {
4180  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4181  for( size_t k=0UL; k<K; ++k ) {
4182  const IntrinsicType b1( set( B(k,j) ) );
4183  xmm1 = xmm1 + A.load(i ,k) * b1;
4184  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
4185  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
4186  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
4187  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
4188  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
4189  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
4190  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
4191  }
4192  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
4193  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
4194  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
4195  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
4196  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) - xmm5 * factor );
4197  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) - xmm6 * factor );
4198  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) - xmm7 * factor );
4199  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) - xmm8 * factor );
4200  }
4201  }
4202  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
4203  size_t j( 0UL );
4204  for( ; (j+2UL) <= N; j+=2UL ) {
4205  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4206  for( size_t k=0UL; k<K; ++k ) {
4207  const IntrinsicType a1( A.load(i ,k) );
4208  const IntrinsicType a2( A.load(i+IT::size ,k) );
4209  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
4210  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
4211  const IntrinsicType b1( set( B(k,j ) ) );
4212  const IntrinsicType b2( set( B(k,j+1UL) ) );
4213  xmm1 = xmm1 + a1 * b1;
4214  xmm2 = xmm2 + a2 * b1;
4215  xmm3 = xmm3 + a3 * b1;
4216  xmm4 = xmm4 + a4 * b1;
4217  xmm5 = xmm5 + a1 * b2;
4218  xmm6 = xmm6 + a2 * b2;
4219  xmm7 = xmm7 + a3 * b2;
4220  xmm8 = xmm8 + a4 * b2;
4221  }
4222  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4223  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) - xmm2 * factor );
4224  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) - xmm3 * factor );
4225  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) - xmm4 * factor );
4226  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
4227  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) - xmm6 * factor );
4228  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) - xmm7 * factor );
4229  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) - xmm8 * factor );
4230  }
4231  if( j < N ) {
4232  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4233  for( size_t k=0UL; k<K; ++k ) {
4234  const IntrinsicType b1( set( B(k,j) ) );
4235  xmm1 = xmm1 + A.load(i ,k) * b1;
4236  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
4237  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
4238  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
4239  }
4240  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
4241  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
4242  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
4243  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
4244  }
4245  }
4246  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
4247  size_t j( 0UL );
4248  for( ; (j+2UL) <= N; j+=2UL ) {
4249  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4250  for( size_t k=0UL; k<K; ++k ) {
4251  const IntrinsicType a1( A.load(i ,k) );
4252  const IntrinsicType a2( A.load(i+IT::size,k) );
4253  const IntrinsicType b1( set( B(k,j ) ) );
4254  const IntrinsicType b2( set( B(k,j+1UL) ) );
4255  xmm1 = xmm1 + a1 * b1;
4256  xmm2 = xmm2 + a2 * b1;
4257  xmm3 = xmm3 + a1 * b2;
4258  xmm4 = xmm4 + a2 * b2;
4259  }
4260  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4261  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) - xmm2 * factor );
4262  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
4263  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) - xmm4 * factor );
4264  }
4265  if( j < N ) {
4266  IntrinsicType xmm1, xmm2;
4267  for( size_t k=0UL; k<K; ++k ) {
4268  const IntrinsicType b1( set( B(k,j) ) );
4269  xmm1 = xmm1 + A.load(i ,k) * b1;
4270  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
4271  }
4272  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
4273  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) - xmm2 * factor );
4274  }
4275  }
4276  if( i < M ) {
4277  size_t j( 0UL );
4278  for( ; (j+2UL) <= N; j+=2UL ) {
4279  IntrinsicType xmm1, xmm2;
4280  for( size_t k=0UL; k<K; ++k ) {
4281  const IntrinsicType a1( A.load(i,k) );
4282  xmm1 = xmm1 + a1 * set( B(k,j ) );
4283  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
4284  }
4285  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
4286  (~C).store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
4287  }
4288  if( j < N ) {
4289  IntrinsicType xmm1;
4290  for( size_t k=0UL; k<K; ++k ) {
4291  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
4292  }
4293  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
4294  }
4295  }
4296  }
4297  //**********************************************************************************************
4298 
4299  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
4313  template< typename MT3 // Type of the left-hand side target matrix
4314  , typename MT4 // Type of the left-hand side matrix operand
4315  , typename MT5 // Type of the right-hand side matrix operand
4316  , typename ST2 > // Type of the scalar value
4317  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4318  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4319  {
4320  selectDefaultSubAssignKernel( C, A, B, scalar );
4321  }
4322  //**********************************************************************************************
4323 
4324  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
4325 #if BLAZE_BLAS_MODE
4326 
4339  template< typename MT3 // Type of the left-hand side target matrix
4340  , typename MT4 // Type of the left-hand side matrix operand
4341  , typename MT5 // Type of the right-hand side matrix operand
4342  , typename ST2 > // Type of the scalar value
4343  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4344  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4345  {
4346  sgemm( C, A, B, -scalar, 1.0F );
4347  }
4348 #endif
4349  //**********************************************************************************************
4350 
4351  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
4352 #if BLAZE_BLAS_MODE
4353 
4366  template< typename MT3 // Type of the left-hand side target matrix
4367  , typename MT4 // Type of the left-hand side matrix operand
4368  , typename MT5 // Type of the right-hand side matrix operand
4369  , typename ST2 > // Type of the scalar value
4370  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4371  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4372  {
4373  dgemm( C, A, B, -scalar, 1.0 );
4374  }
4375 #endif
4376  //**********************************************************************************************
4377 
4378  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
4379 #if BLAZE_BLAS_MODE
4380 
4393  template< typename MT3 // Type of the left-hand side target matrix
4394  , typename MT4 // Type of the left-hand side matrix operand
4395  , typename MT5 // Type of the right-hand side matrix operand
4396  , typename ST2 > // Type of the scalar value
4397  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4398  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4399  {
4400  cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4401  }
4402 #endif
4403  //**********************************************************************************************
4404 
4405  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
4406 #if BLAZE_BLAS_MODE
4407 
4420  template< typename MT3 // Type of the left-hand side target matrix
4421  , typename MT4 // Type of the left-hand side matrix operand
4422  , typename MT5 // Type of the right-hand side matrix operand
4423  , typename ST2 > // Type of the scalar value
4424  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4425  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4426  {
4427  zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4428  }
4429 #endif
4430  //**********************************************************************************************
4431 
4432  //**Restructuring subtraction assignment to row-major matrices**********************************
4446  template< typename MT > // Type of the target matrix
4447  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4448  subAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
4449  {
4451 
4453 
4454  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4455  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4456 
4457  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4458  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4459 
4460  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4461  subAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
4462  else if( IsSymmetric<MT1>::value )
4463  subAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
4464  else
4465  subAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4466  }
4467  //**********************************************************************************************
4468 
4469  //**Subtraction assignment to sparse matrices***************************************************
4470  // No special implementation for the subtraction assignment to sparse matrices.
4471  //**********************************************************************************************
4472 
4473  //**Multiplication assignment to dense matrices*************************************************
4474  // No special implementation for the multiplication assignment to dense matrices.
4475  //**********************************************************************************************
4476 
4477  //**Multiplication assignment to sparse matrices************************************************
4478  // No special implementation for the multiplication assignment to sparse matrices.
4479  //**********************************************************************************************
4480 
4481  //**SMP assignment to dense matrices************************************************************
4496  template< typename MT // Type of the target dense matrix
4497  , bool SO > // Storage order of the target dense matrix
4498  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4499  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4500  {
4502 
4503  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4504  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4505 
4506  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4507  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4508 
4509  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4510  return;
4511  }
4512  else if( left.columns() == 0UL ) {
4513  reset( ~lhs );
4514  return;
4515  }
4516 
4517  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4518  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4519 
4520  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4521  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4522  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4523  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4524  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4525  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4526 
4527  smpAssign( ~lhs, A * B * rhs.scalar_ );
4528  }
4529  //**********************************************************************************************
4530 
4531  //**SMP assignment to sparse matrices***********************************************************
4546  template< typename MT // Type of the target sparse matrix
4547  , bool SO > // Storage order of the target sparse matrix
4548  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4549  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4550  {
4552 
4553  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
4554 
4561 
4562  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4563  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4564 
4565  const TmpType tmp( rhs );
4566  smpAssign( ~lhs, tmp );
4567  }
4568  //**********************************************************************************************
4569 
4570  //**Restructuring SMP assignment to row-major matrices******************************************
4584  template< typename MT > // Type of the target matrix
4585  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4586  smpAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
4587  {
4589 
4591 
4592  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4593  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4594 
4595  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4596  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4597 
4598  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4599  smpAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
4600  else if( IsSymmetric<MT1>::value )
4601  smpAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
4602  else
4603  smpAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4604  }
4605  //**********************************************************************************************
4606 
4607  //**SMP addition assignment to dense matrices***************************************************
4622  template< typename MT // Type of the target dense matrix
4623  , bool SO > // Storage order of the target dense matrix
4624  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4625  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4626  {
4628 
4629  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4630  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4631 
4632  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4633  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4634 
4635  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4636  return;
4637  }
4638 
4639  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4640  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4641 
4642  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4643  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4644  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4645  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4646  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4647  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4648 
4649  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
4650  }
4651  //**********************************************************************************************
4652 
4653  //**Restructuring SMP addition assignment to row-major matrices*********************************
4668  template< typename MT > // Type of the target matrix
4669  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4670  smpAddAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
4671  {
4673 
4675 
4676  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4677  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4678 
4679  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4680  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4681 
4682  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4683  smpAddAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
4684  else if( IsSymmetric<MT1>::value )
4685  smpAddAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
4686  else
4687  smpAddAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4688  }
4689  //**********************************************************************************************
4690 
4691  //**SMP addition assignment to sparse matrices**************************************************
4692  // No special implementation for the SMP addition assignment to sparse matrices.
4693  //**********************************************************************************************
4694 
4695  //**SMP subtraction assignment to dense matrices************************************************
4710  template< typename MT // Type of the target dense matrix
4711  , bool SO > // Storage order of the target dense matrix
4712  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4713  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4714  {
4716 
4717  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4718  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4719 
4720  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4721  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4722 
4723  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4724  return;
4725  }
4726 
4727  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4728  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4729 
4730  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4731  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4732  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4733  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4734  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4735  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4736 
4737  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
4738  }
4739  //**********************************************************************************************
4740 
4741  //**Restructuring SMP subtraction assignment to row-major matrices******************************
4756  template< typename MT > // Type of the target matrix
4757  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4758  smpSubAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
4759  {
4761 
4763 
4764  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4765  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4766 
4767  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4768  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4769 
4770  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4771  smpSubAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
4772  else if( IsSymmetric<MT1>::value )
4773  smpSubAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
4774  else
4775  smpSubAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4776  }
4777  //**********************************************************************************************
4778 
4779  //**SMP subtraction assignment to sparse matrices***********************************************
4780  // No special implementation for the SMP subtraction assignment to sparse matrices.
4781  //**********************************************************************************************
4782 
4783  //**SMP multiplication assignment to dense matrices*********************************************
4784  // No special implementation for the SMP multiplication assignment to dense matrices.
4785  //**********************************************************************************************
4786 
4787  //**SMP multiplication assignment to sparse matrices********************************************
4788  // No special implementation for the SMP multiplication assignment to sparse matrices.
4789  //**********************************************************************************************
4790 
4791  //**Compile time checks*************************************************************************
4800  //**********************************************************************************************
4801 };
4803 //*************************************************************************************************
4804 
4805 
4806 
4807 
4808 //=================================================================================================
4809 //
4810 // GLOBAL BINARY ARITHMETIC OPERATORS
4811 //
4812 //=================================================================================================
4813 
4814 //*************************************************************************************************
4840 template< typename T1 // Type of the left-hand side dense matrix
4841  , typename T2 > // Type of the right-hand side dense matrix
4842 inline const TDMatTDMatMultExpr<T1,T2>
4844 {
4846 
4847  if( (~lhs).columns() != (~rhs).rows() )
4848  throw std::invalid_argument( "Matrix sizes do not match" );
4849 
4850  return TDMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
4851 }
4852 //*************************************************************************************************
4853 
4854 
4855 
4856 
4857 //=================================================================================================
4858 //
4859 // ROWS SPECIALIZATIONS
4860 //
4861 //=================================================================================================
4862 
4863 //*************************************************************************************************
4865 template< typename MT1, typename MT2 >
4866 struct Rows< TDMatTDMatMultExpr<MT1,MT2> >
4867  : public Rows<MT1>
4868 {};
4870 //*************************************************************************************************
4871 
4872 
4873 
4874 
4875 //=================================================================================================
4876 //
4877 // COLUMNS SPECIALIZATIONS
4878 //
4879 //=================================================================================================
4880 
4881 //*************************************************************************************************
4883 template< typename MT1, typename MT2 >
4884 struct Columns< TDMatTDMatMultExpr<MT1,MT2> >
4885  : public Columns<MT2>
4886 {};
4888 //*************************************************************************************************
4889 
4890 
4891 
4892 
4893 //=================================================================================================
4894 //
4895 // ISLOWER SPECIALIZATIONS
4896 //
4897 //=================================================================================================
4898 
4899 //*************************************************************************************************
4901 template< typename MT1, typename MT2 >
4902 struct IsLower< TDMatTDMatMultExpr<MT1,MT2> >
4903  : public IsTrue< IsLower<MT1>::value && IsLower<MT2>::value >
4904 {};
4906 //*************************************************************************************************
4907 
4908 
4909 
4910 
4911 //=================================================================================================
4912 //
4913 // ISUPPER SPECIALIZATIONS
4914 //
4915 //=================================================================================================
4916 
4917 //*************************************************************************************************
4919 template< typename MT1, typename MT2 >
4920 struct IsUpper< TDMatTDMatMultExpr<MT1,MT2> >
4921  : public IsTrue< IsUpper<MT1>::value && IsUpper<MT2>::value >
4922 {};
4924 //*************************************************************************************************
4925 
4926 
4927 
4928 
4929 //=================================================================================================
4930 //
4931 // EXPRESSION TRAIT SPECIALIZATIONS
4932 //
4933 //=================================================================================================
4934 
4935 //*************************************************************************************************
4937 template< typename MT1, typename MT2, typename VT >
4938 struct TDMatDVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
4939 {
4940  public:
4941  //**********************************************************************************************
4942  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4943  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4944  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4945  , typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4946  , INVALID_TYPE >::Type Type;
4947  //**********************************************************************************************
4948 };
4950 //*************************************************************************************************
4951 
4952 
4953 //*************************************************************************************************
4955 template< typename MT1, typename MT2, typename VT >
4956 struct TDMatSVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
4957 {
4958  public:
4959  //**********************************************************************************************
4960  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4961  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4962  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4963  , typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4964  , INVALID_TYPE >::Type Type;
4965  //**********************************************************************************************
4966 };
4968 //*************************************************************************************************
4969 
4970 
4971 //*************************************************************************************************
4973 template< typename VT, typename MT1, typename MT2 >
4974 struct TDVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
4975 {
4976  public:
4977  //**********************************************************************************************
4978  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4979  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4980  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4981  , typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4982  , INVALID_TYPE >::Type Type;
4983  //**********************************************************************************************
4984 };
4986 //*************************************************************************************************
4987 
4988 
4989 //*************************************************************************************************
4991 template< typename VT, typename MT1, typename MT2 >
4992 struct TSVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
4993 {
4994  public:
4995  //**********************************************************************************************
4996  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4997  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4998  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4999  , typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
5000  , INVALID_TYPE >::Type Type;
5001  //**********************************************************************************************
5002 };
5004 //*************************************************************************************************
5005 
5006 
5007 //*************************************************************************************************
5009 template< typename MT1, typename MT2, bool AF >
5010 struct SubmatrixExprTrait< TDMatTDMatMultExpr<MT1,MT2>, AF >
5011 {
5012  public:
5013  //**********************************************************************************************
5014  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
5015  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
5016  //**********************************************************************************************
5017 };
5019 //*************************************************************************************************
5020 
5021 
5022 //*************************************************************************************************
5024 template< typename MT1, typename MT2 >
5025 struct RowExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
5026 {
5027  public:
5028  //**********************************************************************************************
5029  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
5030  //**********************************************************************************************
5031 };
5033 //*************************************************************************************************
5034 
5035 
5036 //*************************************************************************************************
5038 template< typename MT1, typename MT2 >
5039 struct ColumnExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
5040 {
5041  public:
5042  //**********************************************************************************************
5043  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
5044  //**********************************************************************************************
5045 };
5047 //*************************************************************************************************
5048 
5049 } // namespace blaze
5050 
5051 #endif
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:286
Data type constraint.
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:131
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:282
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:129
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
const size_t TDMATTDMATMULT_THRESHOLD
Column-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies t...
Definition: Thresholds.h:176
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:258
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:444
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:130
Header file for the IsColumnMajorMatrix type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTDMatMultExpr.h:425
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:224
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:277
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:331
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:255
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:131
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:403
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2474
Header file for the IsFloat type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:381
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:279
TDMatTDMatMultExpr< MT1, MT2 > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:276
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
const size_t SMP_TDMATTDMATMULT_THRESHOLD
SMP column-major dense matrix/column-major dense matrix multiplication threshold.This threshold speci...
Definition: Thresholds.h:903
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsSymmetric type trait.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:126
Header file for the IsDouble type trait.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:104
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TSVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:289
Header file for the TDMatSVecMultExprTrait class template.
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:391
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:283
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:415
Header file for the IsNumeric type trait.
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:211
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:104
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:278
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:280
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:361
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:292
Header file for the IsRowMajorMatrix type trait.
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:932
Header file for the IsComputation type trait class.
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:316
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:127
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
Header file for the IsTrue value trait.
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the complex data type.
Header file for the IsUpper type trait.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:295
Header file for the IsColumnVector type trait.
Constraint on the data type.
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T > >::Type store(T *address, const typename Store< T, sizeof(T)>::Type &value)
Aligned store of a vector of integral values.
Definition: Store.h:225
Header file for the IsResizable type trait.
Constraint on the data type.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTDMatMultExpr.h:435
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:371
Header file for the IsExpression type trait class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:128
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTDMatMultExpr.h:281
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:445