All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
55 #include <blaze/math/Intrinsics.h>
56 #include <blaze/math/shims/Reset.h>
82 #include <blaze/system/BLAS.h>
84 #include <blaze/util/Assert.h>
85 #include <blaze/util/Complex.h>
92 #include <blaze/util/DisableIf.h>
93 #include <blaze/util/EnableIf.h>
94 #include <blaze/util/InvalidType.h>
96 #include <blaze/util/SelectType.h>
97 #include <blaze/util/Types.h>
104 
105 
106 namespace blaze {
107 
108 //=================================================================================================
109 //
110 // CLASS DMATDMATMULTEXPR
111 //
112 //=================================================================================================
113 
114 //*************************************************************************************************
121 template< typename MT1 // Type of the left-hand side dense matrix
122  , typename MT2 > // Type of the right-hand side dense matrix
123 class DMatDMatMultExpr : public DenseMatrix< DMatDMatMultExpr<MT1,MT2>, false >
124  , private MatMatMultExpr
125  , private Computation
126 {
127  private:
128  //**Type definitions****************************************************************************
129  typedef typename MT1::ResultType RT1;
130  typedef typename MT2::ResultType RT2;
131  typedef typename RT1::ElementType ET1;
132  typedef typename RT2::ElementType ET2;
133  typedef typename MT1::CompositeType CT1;
134  typedef typename MT2::CompositeType CT2;
135  //**********************************************************************************************
136 
137  //**********************************************************************************************
140  //**********************************************************************************************
141 
142  //**********************************************************************************************
144  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
145  //**********************************************************************************************
146 
147  //**********************************************************************************************
149 
155  template< typename T1, typename T2, typename T3 >
156  struct CanExploitSymmetry {
157  enum { value = IsColumnMajorMatrix<T1>::value &&
158  ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
159  };
161  //**********************************************************************************************
162 
163  //**********************************************************************************************
165 
169  template< typename T1, typename T2, typename T3 >
170  struct IsEvaluationRequired {
171  enum { value = ( evaluateLeft || evaluateRight ) &&
172  !CanExploitSymmetry<T1,T2,T3>::value };
173  };
175  //**********************************************************************************************
176 
177  //**********************************************************************************************
179 
182  template< typename T1, typename T2, typename T3 >
183  struct UseSinglePrecisionKernel {
184  enum { value = BLAZE_BLAS_MODE &&
185  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
186  IsFloat<typename T1::ElementType>::value &&
187  IsFloat<typename T2::ElementType>::value &&
188  IsFloat<typename T3::ElementType>::value };
189  };
191  //**********************************************************************************************
192 
193  //**********************************************************************************************
195 
198  template< typename T1, typename T2, typename T3 >
199  struct UseDoublePrecisionKernel {
200  enum { value = BLAZE_BLAS_MODE &&
201  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
202  IsDouble<typename T1::ElementType>::value &&
203  IsDouble<typename T2::ElementType>::value &&
204  IsDouble<typename T3::ElementType>::value };
205  };
207  //**********************************************************************************************
208 
209  //**********************************************************************************************
211 
215  template< typename T1, typename T2, typename T3 >
216  struct UseSinglePrecisionComplexKernel {
217  typedef complex<float> Type;
218  enum { value = BLAZE_BLAS_MODE &&
219  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
220  IsSame<typename T1::ElementType,Type>::value &&
221  IsSame<typename T2::ElementType,Type>::value &&
222  IsSame<typename T3::ElementType,Type>::value };
223  };
225  //**********************************************************************************************
226 
227  //**********************************************************************************************
229 
233  template< typename T1, typename T2, typename T3 >
234  struct UseDoublePrecisionComplexKernel {
235  typedef complex<double> Type;
236  enum { value = BLAZE_BLAS_MODE &&
237  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
238  IsSame<typename T1::ElementType,Type>::value &&
239  IsSame<typename T2::ElementType,Type>::value &&
240  IsSame<typename T3::ElementType,Type>::value };
241  };
243  //**********************************************************************************************
244 
245  //**********************************************************************************************
247 
250  template< typename T1, typename T2, typename T3 >
251  struct UseDefaultKernel {
252  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
253  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
254  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
255  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
256  };
258  //**********************************************************************************************
259 
260  //**********************************************************************************************
262 
265  template< typename T1, typename T2, typename T3 >
266  struct UseVectorizedDefaultKernel {
267  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
268  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
269  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
270  IntrinsicTrait<typename T1::ElementType>::addition &&
271  IntrinsicTrait<typename T1::ElementType>::subtraction &&
272  IntrinsicTrait<typename T1::ElementType>::multiplication };
273  };
275  //**********************************************************************************************
276 
277  public:
278  //**Type definitions****************************************************************************
285  typedef const ElementType ReturnType;
286  typedef const ResultType CompositeType;
287 
289  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
290 
292  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
293 
296 
299  //**********************************************************************************************
300 
301  //**Compilation flags***************************************************************************
303  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
307 
309  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
310  !evaluateRight && MT2::smpAssignable };
311  //**********************************************************************************************
312 
313  //**Constructor*********************************************************************************
319  explicit inline DMatDMatMultExpr( const MT1& lhs, const MT2& rhs )
320  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
321  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
322  {
323  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
324  }
325  //**********************************************************************************************
326 
327  //**Access operator*****************************************************************************
334  inline ReturnType operator()( size_t i, size_t j ) const {
335  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
336  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
337 
338  ElementType tmp;
339 
340  if( lhs_.columns() != 0UL ) {
341  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
342  tmp = lhs_(i,0UL) * rhs_(0UL,j);
343  for( size_t k=1UL; k<end; k+=2UL ) {
344  tmp += lhs_(i,k ) * rhs_(k ,j);
345  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
346  }
347  if( end < lhs_.columns() ) {
348  tmp += lhs_(i,end) * rhs_(end,j);
349  }
350  }
351  else {
352  reset( tmp );
353  }
354 
355  return tmp;
356  }
357  //**********************************************************************************************
358 
359  //**Rows function*******************************************************************************
364  inline size_t rows() const {
365  return lhs_.rows();
366  }
367  //**********************************************************************************************
368 
369  //**Columns function****************************************************************************
374  inline size_t columns() const {
375  return rhs_.columns();
376  }
377  //**********************************************************************************************
378 
379  //**Left operand access*************************************************************************
384  inline LeftOperand leftOperand() const {
385  return lhs_;
386  }
387  //**********************************************************************************************
388 
389  //**Right operand access************************************************************************
394  inline RightOperand rightOperand() const {
395  return rhs_;
396  }
397  //**********************************************************************************************
398 
399  //**********************************************************************************************
405  template< typename T >
406  inline bool canAlias( const T* alias ) const {
407  return ( lhs_.canAlias( alias ) || rhs_.canAlias( alias ) );
408  }
409  //**********************************************************************************************
410 
411  //**********************************************************************************************
417  template< typename T >
418  inline bool isAliased( const T* alias ) const {
419  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
420  }
421  //**********************************************************************************************
422 
423  //**********************************************************************************************
428  inline bool isAligned() const {
429  return lhs_.isAligned() && rhs_.isAligned();
430  }
431  //**********************************************************************************************
432 
433  //**********************************************************************************************
438  inline bool canSMPAssign() const {
439  return ( !BLAZE_BLAS_IS_PARALLEL ||
440  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
442  }
443  //**********************************************************************************************
444 
445  private:
446  //**Member variables****************************************************************************
449  //**********************************************************************************************
450 
451  //**BLAS kernel (single precision)***********************************************************
452 #if BLAZE_BLAS_MODE
453 
468  template< typename MT3 // Type of the left-hand side target matrix
469  , typename MT4 // Type of the left-hand side matrix operand
470  , typename MT5 > // Type of the right-hand side matrix operand
471  static inline void sgemm( MT3& C, const MT4& A, const MT5& B, float alpha, float beta )
472  {
473  using boost::numeric_cast;
474 
478 
479  const int M ( numeric_cast<int>( A.rows() ) );
480  const int N ( numeric_cast<int>( B.columns() ) );
481  const int K ( numeric_cast<int>( A.columns() ) );
482  const int lda( numeric_cast<int>( A.spacing() ) );
483  const int ldb( numeric_cast<int>( B.spacing() ) );
484  const int ldc( numeric_cast<int>( C.spacing() ) );
485 
487  cblas_ssymm( CblasRowMajor, CblasLeft, CblasLower,
488  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
489  }
491  cblas_ssymm( CblasRowMajor, CblasRight, CblasLower,
492  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
493  }
494  else {
495  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
496  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
497  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
498  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
499  }
500  }
502 #endif
503  //**********************************************************************************************
504 
505  //**BLAS kernel (double precision)**************************************************************
506 #if BLAZE_BLAS_MODE
507 
522  template< typename MT3 // Type of the left-hand side target matrix
523  , typename MT4 // Type of the left-hand side matrix operand
524  , typename MT5 > // Type of the right-hand side matrix operand
525  static inline void dgemm( MT3& C, const MT4& A, const MT5& B, double alpha, double beta )
526  {
527  using boost::numeric_cast;
528 
532 
533  const int M ( numeric_cast<int>( A.rows() ) );
534  const int N ( numeric_cast<int>( B.columns() ) );
535  const int K ( numeric_cast<int>( A.columns() ) );
536  const int lda( numeric_cast<int>( A.spacing() ) );
537  const int ldb( numeric_cast<int>( B.spacing() ) );
538  const int ldc( numeric_cast<int>( C.spacing() ) );
539 
541  cblas_dsymm( CblasRowMajor, CblasLeft, CblasLower,
542  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
543  }
544  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
545  cblas_dsymm( CblasRowMajor, CblasRight, CblasLower,
546  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
547  }
548  else {
549  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
550  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
551  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
552  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
553  }
554  }
556 #endif
557  //**********************************************************************************************
558 
559  //**BLAS-based assignment to dense matrices (single precision complex)**************************
560 #if BLAZE_BLAS_MODE
561 
576  template< typename MT3 // Type of the left-hand side target matrix
577  , typename MT4 // Type of the left-hand side matrix operand
578  , typename MT5 > // Type of the right-hand side matrix operand
579  static inline void cgemm( MT3& C, const MT4& A, const MT5& B,
580  complex<float> alpha, complex<float> beta )
581  {
582  using boost::numeric_cast;
583 
587  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
588  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
589  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
590 
591  const int M ( numeric_cast<int>( A.rows() ) );
592  const int N ( numeric_cast<int>( B.columns() ) );
593  const int K ( numeric_cast<int>( A.columns() ) );
594  const int lda( numeric_cast<int>( A.spacing() ) );
595  const int ldb( numeric_cast<int>( B.spacing() ) );
596  const int ldc( numeric_cast<int>( C.spacing() ) );
597 
598  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
599  cblas_csymm( CblasRowMajor, CblasLeft, CblasLower,
600  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
601  }
602  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
603  cblas_csymm( CblasRowMajor, CblasRight, CblasLower,
604  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
605  }
606  else {
607  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
608  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
609  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
610  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
611  }
612  }
614 #endif
615  //**********************************************************************************************
616 
617  //**BLAS-based assignment to dense matrices (double precision complex)**************************
618 #if BLAZE_BLAS_MODE
619 
634  template< typename MT3 // Type of the left-hand side target matrix
635  , typename MT4 // Type of the left-hand side matrix operand
636  , typename MT5 > // Type of the right-hand side matrix operand
637  static inline void zgemm( MT3& C, const MT4& A, const MT5& B,
638  complex<double> alpha, complex<double> beta )
639  {
640  using boost::numeric_cast;
641 
645  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
646  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
647  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
648 
649  const int M ( numeric_cast<int>( A.rows() ) );
650  const int N ( numeric_cast<int>( B.columns() ) );
651  const int K ( numeric_cast<int>( A.columns() ) );
652  const int lda( numeric_cast<int>( A.spacing() ) );
653  const int ldb( numeric_cast<int>( B.spacing() ) );
654  const int ldc( numeric_cast<int>( C.spacing() ) );
655 
656  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
657  cblas_zsymm( CblasRowMajor, CblasLeft, CblasLower,
658  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
659  }
660  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
661  cblas_zsymm( CblasRowMajor, CblasRight, CblasLower,
662  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
663  }
664  else {
665  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
666  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
667  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
668  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
669  }
670  }
672 #endif
673  //**********************************************************************************************
674 
675  //**Assignment to dense matrices****************************************************************
688  template< typename MT // Type of the target dense matrix
689  , bool SO > // Storage order of the target dense matrix
690  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
691  assign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
692  {
694 
695  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
696  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
697 
698  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
699  return;
700  }
701  else if( rhs.lhs_.columns() == 0UL ) {
702  reset( ~lhs );
703  return;
704  }
705 
706  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
707  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
708 
709  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
710  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
711  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
712  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
713  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
714  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
715 
716  DMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
717  }
719  //**********************************************************************************************
720 
721  //**Assignment to dense matrices (kernel selection)*********************************************
732  template< typename MT3 // Type of the left-hand side target matrix
733  , typename MT4 // Type of the left-hand side matrix operand
734  , typename MT5 > // Type of the right-hand side matrix operand
735  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
736  {
737  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
738  DMatDMatMultExpr::selectDefaultAssignKernel( C, A, B );
739  else
740  DMatDMatMultExpr::selectBlasAssignKernel( C, A, B );
741  }
743  //**********************************************************************************************
744 
745  //**Default assignment to dense matrices********************************************************
758  template< typename MT3 // Type of the left-hand side target matrix
759  , typename MT4 // Type of the left-hand side matrix operand
760  , typename MT5 > // Type of the right-hand side matrix operand
761  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
762  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
763  {
764  const size_t M( A.rows() );
765  const size_t N( B.columns() );
766  const size_t K( A.columns() );
767 
768  for( size_t i=0UL; i<M; ++i ) {
769  for( size_t j=0UL; j<N; ++j ) {
770  C(i,j) = A(i,0UL) * B(0UL,j);
771  }
772  for( size_t k=1UL; k<K; ++k ) {
773  for( size_t j=0UL; j<N; ++j ) {
774  C(i,j) += A(i,k) * B(k,j);
775  }
776  }
777  }
778  }
780  //**********************************************************************************************
781 
782  //**Vectorized default assignment to row-major dense matrices***********************************
796  template< typename MT3 // Type of the left-hand side target matrix
797  , typename MT4 // Type of the left-hand side matrix operand
798  , typename MT5 > // Type of the right-hand side matrix operand
799  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
800  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
801  {
802  typedef IntrinsicTrait<ElementType> IT;
803 
804  const size_t M( A.rows() );
805  const size_t N( B.columns() );
806  const size_t K( A.columns() );
807 
808  size_t j( 0UL );
809 
810  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
811  for( size_t i=0UL; i<M; ++i ) {
812  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
813  for( size_t k=0UL; k<K; ++k ) {
814  const IntrinsicType a1( set( A(i,k) ) );
815  xmm1 = xmm1 + a1 * B.load(k,j );
816  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
817  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
818  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
819  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
820  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
821  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
822  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
823  }
824  (~C).store( i, j , xmm1 );
825  (~C).store( i, j+IT::size , xmm2 );
826  (~C).store( i, j+IT::size*2UL, xmm3 );
827  (~C).store( i, j+IT::size*3UL, xmm4 );
828  (~C).store( i, j+IT::size*4UL, xmm5 );
829  (~C).store( i, j+IT::size*5UL, xmm6 );
830  (~C).store( i, j+IT::size*6UL, xmm7 );
831  (~C).store( i, j+IT::size*7UL, xmm8 );
832  }
833  }
834  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
835  size_t i( 0UL );
836  for( ; (i+2UL) <= M; i+=2UL ) {
837  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
838  for( size_t k=0UL; k<K; ++k ) {
839  const IntrinsicType a1( set( A(i ,k) ) );
840  const IntrinsicType a2( set( A(i+1UL,k) ) );
841  const IntrinsicType b1( B.load(k,j ) );
842  const IntrinsicType b2( B.load(k,j+IT::size ) );
843  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
844  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
845  xmm1 = xmm1 + a1 * b1;
846  xmm2 = xmm2 + a1 * b2;
847  xmm3 = xmm3 + a1 * b3;
848  xmm4 = xmm4 + a1 * b4;
849  xmm5 = xmm5 + a2 * b1;
850  xmm6 = xmm6 + a2 * b2;
851  xmm7 = xmm7 + a2 * b3;
852  xmm8 = xmm8 + a2 * b4;
853  }
854  (~C).store( i , j , xmm1 );
855  (~C).store( i , j+IT::size , xmm2 );
856  (~C).store( i , j+IT::size*2UL, xmm3 );
857  (~C).store( i , j+IT::size*3UL, xmm4 );
858  (~C).store( i+1UL, j , xmm5 );
859  (~C).store( i+1UL, j+IT::size , xmm6 );
860  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
861  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
862  }
863  if( i < M ) {
864  IntrinsicType xmm1, xmm2, xmm3, xmm4;
865  for( size_t k=0UL; k<K; ++k ) {
866  const IntrinsicType a1( set( A(i,k) ) );
867  xmm1 = xmm1 + a1 * B.load(k,j );
868  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
869  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
870  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
871  }
872  (~C).store( i, j , xmm1 );
873  (~C).store( i, j+IT::size , xmm2 );
874  (~C).store( i, j+IT::size*2UL, xmm3 );
875  (~C).store( i, j+IT::size*3UL, xmm4 );
876  }
877  }
878  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
879  size_t i( 0UL );
880  for( ; (i+2UL) <= M; i+=2UL ) {
881  IntrinsicType xmm1, xmm2, xmm3, xmm4;
882  for( size_t k=0UL; k<K; ++k ) {
883  const IntrinsicType a1( set( A(i ,k) ) );
884  const IntrinsicType a2( set( A(i+1UL,k) ) );
885  const IntrinsicType b1( B.load(k,j ) );
886  const IntrinsicType b2( B.load(k,j+IT::size) );
887  xmm1 = xmm1 + a1 * b1;
888  xmm2 = xmm2 + a1 * b2;
889  xmm3 = xmm3 + a2 * b1;
890  xmm4 = xmm4 + a2 * b2;
891  }
892  (~C).store( i , j , xmm1 );
893  (~C).store( i , j+IT::size, xmm2 );
894  (~C).store( i+1UL, j , xmm3 );
895  (~C).store( i+1UL, j+IT::size, xmm4 );
896  }
897  if( i < M ) {
898  IntrinsicType xmm1, xmm2;
899  for( size_t k=0UL; k<K; ++k ) {
900  const IntrinsicType a1( set( A(i,k) ) );
901  xmm1 = xmm1 + a1 * B.load(k,j );
902  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
903  }
904  (~C).store( i, j , xmm1 );
905  (~C).store( i, j+IT::size, xmm2 );
906  }
907  }
908  if( j < N ) {
909  size_t i( 0UL );
910  for( ; (i+2UL) <= M; i+=2UL ) {
911  IntrinsicType xmm1, xmm2;
912  for( size_t k=0UL; k<K; ++k ) {
913  const IntrinsicType b1( B.load(k,j) );
914  xmm1 = xmm1 + set( A(i ,k) ) * b1;
915  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
916  }
917  (~C).store( i , j, xmm1 );
918  (~C).store( i+1UL, j, xmm2 );
919  }
920  if( i < M ) {
921  IntrinsicType xmm1;
922  for( size_t k=0UL; k<K; ++k ) {
923  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
924  }
925  (~C).store( i, j, xmm1 );
926  }
927  }
928  }
930  //**********************************************************************************************
931 
932  //**Vectorized default assignment to column-major dense matrices********************************
946  template< typename MT3 // Type of the left-hand side target matrix
947  , typename MT4 // Type of the left-hand side matrix operand
948  , typename MT5 > // Type of the right-hand side matrix operand
949  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
950  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
951  {
954 
955  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
956  const typename MT4::OppositeType tmp( serial( A ) );
957  assign( ~C, tmp * B );
958  }
959  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
960  const typename MT5::OppositeType tmp( serial( B ) );
961  assign( ~C, A * tmp );
962  }
963  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
964  const typename MT4::OppositeType tmp( serial( A ) );
965  assign( ~C, tmp * B );
966  }
967  else {
968  const typename MT5::OppositeType tmp( serial( B ) );
969  assign( ~C, A * tmp );
970  }
971  }
973  //**********************************************************************************************
974 
975  //**BLAS-based assignment to dense matrices (default)*******************************************
988  template< typename MT3 // Type of the left-hand side target matrix
989  , typename MT4 // Type of the left-hand side matrix operand
990  , typename MT5 > // Type of the right-hand side matrix operand
991  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
992  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
993  {
994  selectDefaultAssignKernel( C, A, B );
995  }
997  //**********************************************************************************************
998 
999  //**BLAS-based assignment to dense matrices (single precision)**********************************
1000 #if BLAZE_BLAS_MODE
1001 
1014  template< typename MT3 // Type of the left-hand side target matrix
1015  , typename MT4 // Type of the left-hand side matrix operand
1016  , typename MT5 > // Type of the right-hand side matrix operand
1017  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1018  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1019  {
1020  sgemm( C, A, B, 1.0F, 0.0F );
1021  }
1023 #endif
1024  //**********************************************************************************************
1025 
1026  //**BLAS-based assignment to dense matrices (double precision)**********************************
1027 #if BLAZE_BLAS_MODE
1028 
1041  template< typename MT3 // Type of the left-hand side target matrix
1042  , typename MT4 // Type of the left-hand side matrix operand
1043  , typename MT5 > // Type of the right-hand side matrix operand
1044  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1045  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1046  {
1047  dgemm( C, A, B, 1.0, 0.0 );
1048  }
1050 #endif
1051  //**********************************************************************************************
1052 
1053  //**BLAS-based assignment to dense matrices (single precision complex)**************************
1054 #if BLAZE_BLAS_MODE
1055 
1068  template< typename MT3 // Type of the left-hand side target matrix
1069  , typename MT4 // Type of the left-hand side matrix operand
1070  , typename MT5 > // Type of the right-hand side matrix operand
1071  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1072  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1073  {
1074  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1075  }
1077 #endif
1078  //**********************************************************************************************
1079 
1080  //**BLAS-based assignment to dense matrices (double precision complex)**************************
1081 #if BLAZE_BLAS_MODE
1082 
1095  template< typename MT3 // Type of the left-hand side target matrix
1096  , typename MT4 // Type of the left-hand side matrix operand
1097  , typename MT5 > // Type of the right-hand side matrix operand
1098  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1099  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1100  {
1101  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1102  }
1104 #endif
1105  //**********************************************************************************************
1106 
1107  //**Assignment to sparse matrices***************************************************************
1120  template< typename MT // Type of the target sparse matrix
1121  , bool SO > // Storage order of the target sparse matrix
1122  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1123  assign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
1124  {
1126 
1127  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1128 
1135 
1136  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1137  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1138 
1139  const TmpType tmp( serial( rhs ) );
1140  assign( ~lhs, tmp );
1141  }
1143  //**********************************************************************************************
1144 
1145  //**Restructuring assignment to column-major matrices*******************************************
1160  template< typename MT > // Type of the target matrix
1161  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1162  assign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
1163  {
1165 
1167 
1168  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1169  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1170 
1171  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1172  assign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
1173  else if( IsSymmetric<MT1>::value )
1174  assign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
1175  else
1176  assign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
1177  }
1179  //**********************************************************************************************
1180 
1181  //**Addition assignment to dense matrices*******************************************************
1194  template< typename MT // Type of the target dense matrix
1195  , bool SO > // Storage order of the target dense matrix
1196  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1197  addAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
1198  {
1200 
1201  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1202  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1203 
1204  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1205  return;
1206  }
1207 
1208  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1209  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1210 
1211  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1212  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1213  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1214  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1215  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1216  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1217 
1218  DMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1219  }
1221  //**********************************************************************************************
1222 
1223  //**Addition assignment to dense matrices (kernel selection)************************************
1234  template< typename MT3 // Type of the left-hand side target matrix
1235  , typename MT4 // Type of the left-hand side matrix operand
1236  , typename MT5 > // Type of the right-hand side matrix operand
1237  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1238  {
1239  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
1240  DMatDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1241  else
1242  DMatDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1243  }
1245  //**********************************************************************************************
1246 
1247  //**Default addition assignment to dense matrices***********************************************
1261  template< typename MT3 // Type of the left-hand side target matrix
1262  , typename MT4 // Type of the left-hand side matrix operand
1263  , typename MT5 > // Type of the right-hand side matrix operand
1264  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1265  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1266  {
1267  const size_t M( A.rows() );
1268  const size_t N( B.columns() );
1269  const size_t K( A.columns() );
1270 
1271  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1272  const size_t end( N & size_t(-2) );
1273 
1274  for( size_t i=0UL; i<M; ++i ) {
1275  for( size_t k=0UL; k<K; ++k ) {
1276  for( size_t j=0UL; j<end; j+=2UL ) {
1277  C(i,j ) += A(i,k) * B(k,j );
1278  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1279  }
1280  if( end < N ) {
1281  C(i,end) += A(i,k) * B(k,end);
1282  }
1283  }
1284  }
1285  }
1287  //**********************************************************************************************
1288 
1289  //**Vectorized default addition assignment to row-major dense matrices**************************
1303  template< typename MT3 // Type of the left-hand side target matrix
1304  , typename MT4 // Type of the left-hand side matrix operand
1305  , typename MT5 > // Type of the right-hand side matrix operand
1306  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1307  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1308  {
1309  typedef IntrinsicTrait<ElementType> IT;
1310 
1311  const size_t M( A.rows() );
1312  const size_t N( B.columns() );
1313  const size_t K( A.columns() );
1314 
1315  size_t j( 0UL );
1316 
1317  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1318  for( size_t i=0UL; i<M; ++i ) {
1319  IntrinsicType xmm1( (~C).load(i,j ) );
1320  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1321  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1322  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1323  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1324  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1325  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1326  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1327  for( size_t k=0UL; k<K; ++k ) {
1328  const IntrinsicType a1( set( A(i,k) ) );
1329  xmm1 = xmm1 + a1 * B.load(k,j );
1330  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1331  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1332  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1333  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
1334  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
1335  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
1336  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
1337  }
1338  (~C).store( i, j , xmm1 );
1339  (~C).store( i, j+IT::size , xmm2 );
1340  (~C).store( i, j+IT::size*2UL, xmm3 );
1341  (~C).store( i, j+IT::size*3UL, xmm4 );
1342  (~C).store( i, j+IT::size*4UL, xmm5 );
1343  (~C).store( i, j+IT::size*5UL, xmm6 );
1344  (~C).store( i, j+IT::size*6UL, xmm7 );
1345  (~C).store( i, j+IT::size*7UL, xmm8 );
1346  }
1347  }
1348  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1349  size_t i( 0UL );
1350  for( ; (i+2UL) <= M; i+=2UL ) {
1351  IntrinsicType xmm1( (~C).load(i ,j ) );
1352  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1353  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1354  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1355  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1356  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1357  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1358  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1359  for( size_t k=0UL; k<K; ++k ) {
1360  const IntrinsicType a1( set( A(i ,k) ) );
1361  const IntrinsicType a2( set( A(i+1UL,k) ) );
1362  const IntrinsicType b1( B.load(k,j ) );
1363  const IntrinsicType b2( B.load(k,j+IT::size ) );
1364  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
1365  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
1366  xmm1 = xmm1 + a1 * b1;
1367  xmm2 = xmm2 + a1 * b2;
1368  xmm3 = xmm3 + a1 * b3;
1369  xmm4 = xmm4 + a1 * b4;
1370  xmm5 = xmm5 + a2 * b1;
1371  xmm6 = xmm6 + a2 * b2;
1372  xmm7 = xmm7 + a2 * b3;
1373  xmm8 = xmm8 + a2 * b4;
1374  }
1375  (~C).store( i , j , xmm1 );
1376  (~C).store( i , j+IT::size , xmm2 );
1377  (~C).store( i , j+IT::size*2UL, xmm3 );
1378  (~C).store( i , j+IT::size*3UL, xmm4 );
1379  (~C).store( i+1UL, j , xmm5 );
1380  (~C).store( i+1UL, j+IT::size , xmm6 );
1381  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
1382  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
1383  }
1384  if( i < M ) {
1385  IntrinsicType xmm1( (~C).load(i,j ) );
1386  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1387  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1388  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1389  for( size_t k=0UL; k<K; ++k ) {
1390  const IntrinsicType a1( set( A(i,k) ) );
1391  xmm1 = xmm1 + a1 * B.load(k,j );
1392  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1393  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1394  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1395  }
1396  (~C).store( i, j , xmm1 );
1397  (~C).store( i, j+IT::size , xmm2 );
1398  (~C).store( i, j+IT::size*2UL, xmm3 );
1399  (~C).store( i, j+IT::size*3UL, xmm4 );
1400  }
1401  }
1402  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1403  size_t i( 0UL );
1404  for( ; (i+2UL) <= M; i+=2UL ) {
1405  IntrinsicType xmm1( (~C).load(i ,j ) );
1406  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
1407  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1408  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
1409  for( size_t k=0UL; k<K; ++k ) {
1410  const IntrinsicType a1( set( A(i ,k) ) );
1411  const IntrinsicType a2( set( A(i+1UL,k) ) );
1412  const IntrinsicType b1( B.load(k,j ) );
1413  const IntrinsicType b2( B.load(k,j+IT::size) );
1414  xmm1 = xmm1 + a1 * b1;
1415  xmm2 = xmm2 + a1 * b2;
1416  xmm3 = xmm3 + a2 * b1;
1417  xmm4 = xmm4 + a2 * b2;
1418  }
1419  (~C).store( i , j , xmm1 );
1420  (~C).store( i , j+IT::size, xmm2 );
1421  (~C).store( i+1UL, j , xmm3 );
1422  (~C).store( i+1UL, j+IT::size, xmm4 );
1423  }
1424  if( i < M ) {
1425  IntrinsicType xmm1( (~C).load(i,j ) );
1426  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
1427  for( size_t k=0UL; k<K; ++k ) {
1428  const IntrinsicType a1( set( A(i,k) ) );
1429  xmm1 = xmm1 + a1 * B.load(k,j );
1430  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
1431  }
1432  (~C).store( i, j , xmm1 );
1433  (~C).store( i, j+IT::size, xmm2 );
1434  }
1435  }
1436  if( j < N ) {
1437  size_t i( 0UL );
1438  for( ; (i+2UL) <= M; i+=2UL ) {
1439  IntrinsicType xmm1( (~C).load(i ,j) );
1440  IntrinsicType xmm2( (~C).load(i+1UL,j) );
1441  for( size_t k=0UL; k<K; ++k ) {
1442  const IntrinsicType b1( B.load(k,j) );
1443  xmm1 = xmm1 + set( A(i ,k) ) * b1;
1444  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
1445  }
1446  (~C).store( i , j, xmm1 );
1447  (~C).store( i+1UL, j, xmm2 );
1448  }
1449  if( i < M ) {
1450  IntrinsicType xmm1( (~C).load(i,j) );
1451  for( size_t k=0UL; k<K; ++k ) {
1452  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
1453  }
1454  (~C).store( i, j, xmm1 );
1455  }
1456  }
1457  }
1459  //**********************************************************************************************
1460 
1461  //**Vectorized default addition assignment to column-major dense matrices***********************
1475  template< typename MT3 // Type of the left-hand side target matrix
1476  , typename MT4 // Type of the left-hand side matrix operand
1477  , typename MT5 > // Type of the right-hand side matrix operand
1478  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1479  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1480  {
1483 
1484  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1485  const typename MT4::OppositeType tmp( serial( A ) );
1486  addAssign( ~C, tmp * B );
1487  }
1488  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1489  const typename MT5::OppositeType tmp( serial( B ) );
1490  addAssign( ~C, A * tmp );
1491  }
1492  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1493  const typename MT4::OppositeType tmp( serial( A ) );
1494  addAssign( ~C, tmp * B );
1495  }
1496  else {
1497  const typename MT5::OppositeType tmp( serial( B ) );
1498  addAssign( ~C, A * tmp );
1499  }
1500  }
1502  //**********************************************************************************************
1503 
1504  //**BLAS-based addition assignment to dense matrices (default)**********************************
1518  template< typename MT3 // Type of the left-hand side target matrix
1519  , typename MT4 // Type of the left-hand side matrix operand
1520  , typename MT5 > // Type of the right-hand side matrix operand
1521  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1522  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1523  {
1524  selectDefaultAddAssignKernel( C, A, B );
1525  }
1527  //**********************************************************************************************
1528 
1529  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1530 #if BLAZE_BLAS_MODE
1531 
1544  template< typename MT3 // Type of the left-hand side target matrix
1545  , typename MT4 // Type of the left-hand side matrix operand
1546  , typename MT5 > // Type of the right-hand side matrix operand
1547  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1548  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1549  {
1550  sgemm( C, A, B, 1.0F, 1.0F );
1551  }
1553 #endif
1554  //**********************************************************************************************
1555 
1556  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1557 #if BLAZE_BLAS_MODE
1558 
1571  template< typename MT3 // Type of the left-hand side target matrix
1572  , typename MT4 // Type of the left-hand side matrix operand
1573  , typename MT5 > // Type of the right-hand side matrix operand
1574  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1575  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1576  {
1577  dgemm( C, A, B, 1.0, 1.0 );
1578  }
1580 #endif
1581  //**********************************************************************************************
1582 
1583  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1584 #if BLAZE_BLAS_MODE
1585 
1598  template< typename MT3 // Type of the left-hand side target matrix
1599  , typename MT4 // Type of the left-hand side matrix operand
1600  , typename MT5 > // Type of the right-hand side matrix operand
1601  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1602  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1603  {
1604  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1605  }
1607 #endif
1608  //**********************************************************************************************
1609 
1610  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1611 #if BLAZE_BLAS_MODE
1612 
1625  template< typename MT3 // Type of the left-hand side target matrix
1626  , typename MT4 // Type of the left-hand side matrix operand
1627  , typename MT5 > // Type of the right-hand side matrix operand
1628  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1629  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1630  {
1631  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1632  }
1634 #endif
1635  //**********************************************************************************************
1636 
1637  //**Restructuring addition assignment to column-major matrices**********************************
1652  template< typename MT > // Type of the target matrix
1653  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1654  addAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
1655  {
1657 
1659 
1660  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1661  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1662 
1663  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1664  addAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
1665  else if( IsSymmetric<MT1>::value )
1666  addAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
1667  else
1668  addAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
1669  }
1671  //**********************************************************************************************
1672 
1673  //**Addition assignment to sparse matrices******************************************************
1674  // No special implementation for the addition assignment to sparse matrices.
1675  //**********************************************************************************************
1676 
1677  //**Subtraction assignment to dense matrices****************************************************
1690  template< typename MT // Type of the target dense matrix
1691  , bool SO > // Storage order of the target dense matrix
1692  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1693  subAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
1694  {
1696 
1697  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1698  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1699 
1700  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1701  return;
1702  }
1703 
1704  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1705  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1706 
1707  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1708  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1709  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1710  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1711  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1712  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1713 
1714  DMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1715  }
1717  //**********************************************************************************************
1718 
1719  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1730  template< typename MT3 // Type of the left-hand side target matrix
1731  , typename MT4 // Type of the left-hand side matrix operand
1732  , typename MT5 > // Type of the right-hand side matrix operand
1733  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1734  {
1735  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
1736  DMatDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1737  else
1738  DMatDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1739  }
1741  //**********************************************************************************************
1742 
1743  //**Default subtraction assignment to dense matrices********************************************
1757  template< typename MT3 // Type of the left-hand side target matrix
1758  , typename MT4 // Type of the left-hand side matrix operand
1759  , typename MT5 > // Type of the right-hand side matrix operand
1760  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1761  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1762  {
1763  const size_t M( A.rows() );
1764  const size_t N( B.columns() );
1765  const size_t K( A.columns() );
1766 
1767  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1768  const size_t end( N & size_t(-2) );
1769 
1770  for( size_t i=0UL; i<M; ++i ) {
1771  for( size_t k=0UL; k<K; ++k ) {
1772  for( size_t j=0UL; j<end; j+=2UL ) {
1773  C(i,j ) -= A(i,k) * B(k,j );
1774  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1775  }
1776  if( end < N ) {
1777  C(i,end) -= A(i,k) * B(k,end);
1778  }
1779  }
1780  }
1781  }
1783  //**********************************************************************************************
1784 
1785  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1799  template< typename MT3 // Type of the left-hand side target matrix
1800  , typename MT4 // Type of the left-hand side matrix operand
1801  , typename MT5 > // Type of the right-hand side matrix operand
1802  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1803  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1804  {
1805  typedef IntrinsicTrait<ElementType> IT;
1806 
1807  const size_t M( A.rows() );
1808  const size_t N( B.columns() );
1809  const size_t K( A.columns() );
1810 
1811  size_t j( 0UL );
1812 
1813  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1814  for( size_t i=0UL; i<M; ++i ) {
1815  IntrinsicType xmm1( (~C).load(i,j ) );
1816  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1817  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1818  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1819  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1820  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1821  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1822  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1823  for( size_t k=0UL; k<K; ++k ) {
1824  const IntrinsicType a1( set( A(i,k) ) );
1825  xmm1 = xmm1 - a1 * B.load(k,j );
1826  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1827  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1828  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1829  xmm5 = xmm5 - a1 * B.load(k,j+IT::size*4UL);
1830  xmm6 = xmm6 - a1 * B.load(k,j+IT::size*5UL);
1831  xmm7 = xmm7 - a1 * B.load(k,j+IT::size*6UL);
1832  xmm8 = xmm8 - a1 * B.load(k,j+IT::size*7UL);
1833  }
1834  (~C).store( i, j , xmm1 );
1835  (~C).store( i, j+IT::size , xmm2 );
1836  (~C).store( i, j+IT::size*2UL, xmm3 );
1837  (~C).store( i, j+IT::size*3UL, xmm4 );
1838  (~C).store( i, j+IT::size*4UL, xmm5 );
1839  (~C).store( i, j+IT::size*5UL, xmm6 );
1840  (~C).store( i, j+IT::size*6UL, xmm7 );
1841  (~C).store( i, j+IT::size*7UL, xmm8 );
1842  }
1843  }
1844  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1845  size_t i( 0UL );
1846  for( ; (i+2UL) <= M; i+=2UL ) {
1847  IntrinsicType xmm1( (~C).load(i ,j ) );
1848  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1849  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1850  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1851  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1852  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1853  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1854  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1855  for( size_t k=0UL; k<K; ++k ) {
1856  const IntrinsicType a1( set( A(i ,k) ) );
1857  const IntrinsicType a2( set( A(i+1UL,k) ) );
1858  const IntrinsicType b1( B.load(k,j ) );
1859  const IntrinsicType b2( B.load(k,j+IT::size ) );
1860  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
1861  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
1862  xmm1 = xmm1 - a1 * b1;
1863  xmm2 = xmm2 - a1 * b2;
1864  xmm3 = xmm3 - a1 * b3;
1865  xmm4 = xmm4 - a1 * b4;
1866  xmm5 = xmm5 - a2 * b1;
1867  xmm6 = xmm6 - a2 * b2;
1868  xmm7 = xmm7 - a2 * b3;
1869  xmm8 = xmm8 - a2 * b4;
1870  }
1871  (~C).store( i , j , xmm1 );
1872  (~C).store( i , j+IT::size , xmm2 );
1873  (~C).store( i , j+IT::size*2UL, xmm3 );
1874  (~C).store( i , j+IT::size*3UL, xmm4 );
1875  (~C).store( i+1UL, j , xmm5 );
1876  (~C).store( i+1UL, j+IT::size , xmm6 );
1877  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
1878  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
1879  }
1880  if( i < M ) {
1881  IntrinsicType xmm1( (~C).load(i,j ) );
1882  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1883  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1884  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1885  for( size_t k=0UL; k<K; ++k ) {
1886  const IntrinsicType a1( set( A(i,k) ) );
1887  xmm1 = xmm1 - a1 * B.load(k,j );
1888  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1889  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1890  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1891  }
1892  (~C).store( i, j , xmm1 );
1893  (~C).store( i, j+IT::size , xmm2 );
1894  (~C).store( i, j+IT::size*2UL, xmm3 );
1895  (~C).store( i, j+IT::size*3UL, xmm4 );
1896  }
1897  }
1898  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1899  size_t i( 0UL );
1900  for( ; (i+2UL) <= M; i+=2UL ) {
1901  IntrinsicType xmm1( (~C).load(i ,j ) );
1902  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
1903  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1904  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
1905  for( size_t k=0UL; k<K; ++k ) {
1906  const IntrinsicType a1( set( A(i ,k) ) );
1907  const IntrinsicType a2( set( A(i+1UL,k) ) );
1908  const IntrinsicType b1( B.load(k,j ) );
1909  const IntrinsicType b2( B.load(k,j+IT::size) );
1910  xmm1 = xmm1 - a1 * b1;
1911  xmm2 = xmm2 - a1 * b2;
1912  xmm3 = xmm3 - a2 * b1;
1913  xmm4 = xmm4 - a2 * b2;
1914  }
1915  (~C).store( i , j , xmm1 );
1916  (~C).store( i , j+IT::size, xmm2 );
1917  (~C).store( i+1UL, j , xmm3 );
1918  (~C).store( i+1UL, j+IT::size, xmm4 );
1919  }
1920  if( i < M ) {
1921  IntrinsicType xmm1( (~C).load(i,j ) );
1922  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
1923  for( size_t k=0UL; k<K; ++k ) {
1924  const IntrinsicType a1( set( A(i,k) ) );
1925  xmm1 = xmm1 - a1 * B.load(k,j );
1926  xmm2 = xmm2 - a1 * B.load(k,j+IT::size);
1927  }
1928  (~C).store( i, j , xmm1 );
1929  (~C).store( i, j+IT::size, xmm2 );
1930  }
1931  }
1932  if( j < N ) {
1933  size_t i( 0UL );
1934  for( ; (i+2UL) <= M; i+=2UL ) {
1935  IntrinsicType xmm1( (~C).load(i ,j) );
1936  IntrinsicType xmm2( (~C).load(i+1UL,j) );
1937  for( size_t k=0UL; k<K; ++k ) {
1938  const IntrinsicType b1( B.load(k,j) );
1939  xmm1 = xmm1 - set( A(i ,k) ) * b1;
1940  xmm2 = xmm2 - set( A(i+1UL,k) ) * b1;
1941  }
1942  (~C).store( i , j, xmm1 );
1943  (~C).store( i+1UL, j, xmm2 );
1944  }
1945  if( i < M ) {
1946  IntrinsicType xmm1( (~C).load(i,j) );
1947  for( size_t k=0UL; k<K; ++k ) {
1948  xmm1 = xmm1 - set( A(i,k) ) * B.load(k,j);
1949  }
1950  (~C).store( i, j, xmm1 );
1951  }
1952  }
1953  }
1955  //**********************************************************************************************
1956 
1957  //**Vectorized default subtraction assignment to column-major dense matrices********************
1971  template< typename MT3 // Type of the left-hand side target matrix
1972  , typename MT4 // Type of the left-hand side matrix operand
1973  , typename MT5 > // Type of the right-hand side matrix operand
1974  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1975  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1976  {
1979 
1980  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1981  const typename MT4::OppositeType tmp( serial( A ) );
1982  subAssign( ~C, tmp * B );
1983  }
1984  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1985  const typename MT5::OppositeType tmp( serial( B ) );
1986  subAssign( ~C, A * tmp );
1987  }
1988  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1989  const typename MT4::OppositeType tmp( serial( A ) );
1990  subAssign( ~C, tmp * B );
1991  }
1992  else {
1993  const typename MT5::OppositeType tmp( serial( B ) );
1994  subAssign( ~C, A * tmp );
1995  }
1996  }
1998  //**********************************************************************************************
1999 
2000  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
2014  template< typename MT3 // Type of the left-hand side target matrix
2015  , typename MT4 // Type of the left-hand side matrix operand
2016  , typename MT5 > // Type of the right-hand side matrix operand
2017  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2018  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2019  {
2020  selectDefaultSubAssignKernel( C, A, B );
2021  }
2023  //**********************************************************************************************
2024 
2025  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
2026 #if BLAZE_BLAS_MODE
2027 
2040  template< typename MT3 // Type of the left-hand side target matrix
2041  , typename MT4 // Type of the left-hand side matrix operand
2042  , typename MT5 > // Type of the right-hand side matrix operand
2043  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2044  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2045  {
2046  sgemm( C, A, B, -1.0F, 1.0F );
2047  }
2049 #endif
2050  //**********************************************************************************************
2051 
2052  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
2053 #if BLAZE_BLAS_MODE
2054 
2067  template< typename MT3 // Type of the left-hand side target matrix
2068  , typename MT4 // Type of the left-hand side matrix operand
2069  , typename MT5 > // Type of the right-hand side matrix operand
2070  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2071  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2072  {
2073  dgemm( C, A, B, -1.0, 1.0 );
2074  }
2076 #endif
2077  //**********************************************************************************************
2078 
2079  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
2080 #if BLAZE_BLAS_MODE
2081 
2094  template< typename MT3 // Type of the left-hand side target matrix
2095  , typename MT4 // Type of the left-hand side matrix operand
2096  , typename MT5 > // Type of the right-hand side matrix operand
2097  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2098  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2099  {
2100  cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2101  }
2103 #endif
2104  //**********************************************************************************************
2105 
2106  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
2107 #if BLAZE_BLAS_MODE
2108 
2121  template< typename MT3 // Type of the left-hand side target matrix
2122  , typename MT4 // Type of the left-hand side matrix operand
2123  , typename MT5 > // Type of the right-hand side matrix operand
2124  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2125  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2126  {
2127  zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2128  }
2130 #endif
2131  //**********************************************************************************************
2132 
2133  //**Restructuring subtraction assignment to column-major matrices*******************************
2148  template< typename MT > // Type of the target matrix
2149  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2150  subAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
2151  {
2153 
2155 
2156  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2157  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2158 
2159  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2160  subAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2161  else if( IsSymmetric<MT1>::value )
2162  subAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2163  else
2164  subAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2165  }
2167  //**********************************************************************************************
2168 
2169  //**Subtraction assignment to sparse matrices***************************************************
2170  // No special implementation for the subtraction assignment to sparse matrices.
2171  //**********************************************************************************************
2172 
2173  //**Multiplication assignment to dense matrices*************************************************
2174  // No special implementation for the multiplication assignment to dense matrices.
2175  //**********************************************************************************************
2176 
2177  //**Multiplication assignment to sparse matrices************************************************
2178  // No special implementation for the multiplication assignment to sparse matrices.
2179  //**********************************************************************************************
2180 
2181  //**SMP assignment to dense matrices************************************************************
2196  template< typename MT // Type of the target dense matrix
2197  , bool SO > // Storage order of the target dense matrix
2198  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2199  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2200  {
2202 
2203  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2204  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2205 
2206  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2207  return;
2208  }
2209  else if( rhs.lhs_.columns() == 0UL ) {
2210  reset( ~lhs );
2211  return;
2212  }
2213 
2214  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2215  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2216 
2217  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2218  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2219  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2220  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2221  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2222  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2223 
2224  smpAssign( ~lhs, A * B );
2225  }
2227  //**********************************************************************************************
2228 
2229  //**SMP assignment to sparse matrices***********************************************************
2244  template< typename MT // Type of the target sparse matrix
2245  , bool SO > // Storage order of the target sparse matrix
2246  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2247  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2248  {
2250 
2251  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2252 
2259 
2260  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2261  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2262 
2263  const TmpType tmp( rhs );
2264  smpAssign( ~lhs, tmp );
2265  }
2267  //**********************************************************************************************
2268 
2269  //**Restructuring SMP assignment to column-major matrices***************************************
2284  template< typename MT > // Type of the target matrix
2285  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2286  smpAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
2287  {
2289 
2291 
2292  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2293  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2294 
2295  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2296  smpAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2297  else if( IsSymmetric<MT1>::value )
2298  smpAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2299  else
2300  smpAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2301  }
2303  //**********************************************************************************************
2304 
2305  //**SMP addition assignment to dense matrices***************************************************
2321  template< typename MT // Type of the target dense matrix
2322  , bool SO > // Storage order of the target dense matrix
2323  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2324  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2325  {
2327 
2328  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2329  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2330 
2331  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2332  return;
2333  }
2334 
2335  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2336  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2337 
2338  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2339  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2340  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2341  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2342  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2343  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2344 
2345  smpAddAssign( ~lhs, A * B );
2346  }
2348  //**********************************************************************************************
2349 
2350  //**Restructuring SMP addition assignment to column-major matrices******************************
2365  template< typename MT > // Type of the target matrix
2366  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2367  smpAddAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
2368  {
2370 
2372 
2373  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2374  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2375 
2376  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2377  smpAddAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2378  else if( IsSymmetric<MT1>::value )
2379  smpAddAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2380  else
2381  smpAddAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2382  }
2384  //**********************************************************************************************
2385 
2386  //**SMP addition assignment to sparse matrices**************************************************
2387  // No special implementation for the SMP addition assignment to sparse matrices.
2388  //**********************************************************************************************
2389 
2390  //**SMP subtraction assignment to dense matrices************************************************
2406  template< typename MT // Type of the target dense matrix
2407  , bool SO > // Storage order of the target dense matrix
2408  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2409  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2410  {
2412 
2413  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2414  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2415 
2416  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2417  return;
2418  }
2419 
2420  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2421  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2422 
2423  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2424  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2425  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2426  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2427  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2428  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2429 
2430  smpSubAssign( ~lhs, A * B );
2431  }
2433  //**********************************************************************************************
2434 
2435  //**Restructuring SMP subtraction assignment to column-major matrices***************************
2450  template< typename MT > // Type of the target matrix
2451  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2452  smpSubAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
2453  {
2455 
2457 
2458  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2459  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2460 
2461  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2462  smpSubAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2463  else if( IsSymmetric<MT1>::value )
2464  smpSubAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2465  else
2466  smpSubAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2467  }
2469  //**********************************************************************************************
2470 
2471  //**SMP subtraction assignment to sparse matrices***********************************************
2472  // No special implementation for the SMP subtraction assignment to sparse matrices.
2473  //**********************************************************************************************
2474 
2475  //**SMP multiplication assignment to dense matrices*********************************************
2476  // No special implementation for the SMP multiplication assignment to dense matrices.
2477  //**********************************************************************************************
2478 
2479  //**SMP multiplication assignment to sparse matrices********************************************
2480  // No special implementation for the SMP multiplication assignment to sparse matrices.
2481  //**********************************************************************************************
2482 
2483  //**Compile time checks*************************************************************************
2491  //**********************************************************************************************
2492 };
2493 //*************************************************************************************************
2494 
2495 
2496 
2497 
2498 //=================================================================================================
2499 //
2500 // DMATSCALARMULTEXPR SPECIALIZATION
2501 //
2502 //=================================================================================================
2503 
2504 //*************************************************************************************************
2512 template< typename MT1 // Type of the left-hand side dense matrix
2513  , typename MT2 // Type of the right-hand side dense matrix
2514  , typename ST > // Type of the right-hand side scalar value
2515 class DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >
2516  : public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
2517  , private MatScalarMultExpr
2518  , private Computation
2519 {
2520  private:
2521  //**Type definitions****************************************************************************
2522  typedef DMatDMatMultExpr<MT1,MT2> MMM;
2523  typedef typename MMM::ResultType RES;
2524  typedef typename MT1::ResultType RT1;
2525  typedef typename MT2::ResultType RT2;
2526  typedef typename RT1::ElementType ET1;
2527  typedef typename RT2::ElementType ET2;
2528  typedef typename MT1::CompositeType CT1;
2529  typedef typename MT2::CompositeType CT2;
2530  //**********************************************************************************************
2531 
2532  //**********************************************************************************************
2534  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2535  //**********************************************************************************************
2536 
2537  //**********************************************************************************************
2539  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2540  //**********************************************************************************************
2541 
2542  //**********************************************************************************************
2544 
2549  template< typename T1, typename T2, typename T3 >
2550  struct CanExploitSymmetry {
2551  enum { value = IsColumnMajorMatrix<T1>::value &&
2552  ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
2553  };
2554  //**********************************************************************************************
2555 
2556  //**********************************************************************************************
2558 
2561  template< typename T1, typename T2, typename T3 >
2562  struct IsEvaluationRequired {
2563  enum { value = ( evaluateLeft || evaluateRight ) &&
2564  !CanExploitSymmetry<T1,T2,T3>::value };
2565  };
2566  //**********************************************************************************************
2567 
2568  //**********************************************************************************************
2570 
2573  template< typename T1, typename T2, typename T3, typename T4 >
2574  struct UseSinglePrecisionKernel {
2575  enum { value = BLAZE_BLAS_MODE &&
2576  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2577  IsFloat<typename T1::ElementType>::value &&
2578  IsFloat<typename T2::ElementType>::value &&
2579  IsFloat<typename T3::ElementType>::value &&
2580  !IsComplex<T4>::value };
2581  };
2582  //**********************************************************************************************
2583 
2584  //**********************************************************************************************
2586 
2589  template< typename T1, typename T2, typename T3, typename T4 >
2590  struct UseDoublePrecisionKernel {
2591  enum { value = BLAZE_BLAS_MODE &&
2592  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2593  IsDouble<typename T1::ElementType>::value &&
2594  IsDouble<typename T2::ElementType>::value &&
2595  IsDouble<typename T3::ElementType>::value &&
2596  !IsComplex<T4>::value };
2597  };
2598  //**********************************************************************************************
2599 
2600  //**********************************************************************************************
2602 
2605  template< typename T1, typename T2, typename T3 >
2606  struct UseSinglePrecisionComplexKernel {
2607  typedef complex<float> Type;
2608  enum { value = BLAZE_BLAS_MODE &&
2609  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2610  IsSame<typename T1::ElementType,Type>::value &&
2611  IsSame<typename T2::ElementType,Type>::value &&
2612  IsSame<typename T3::ElementType,Type>::value };
2613  };
2614  //**********************************************************************************************
2615 
2616  //**********************************************************************************************
2618 
2621  template< typename T1, typename T2, typename T3 >
2622  struct UseDoublePrecisionComplexKernel {
2623  typedef complex<double> Type;
2624  enum { value = BLAZE_BLAS_MODE &&
2625  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2626  IsSame<typename T1::ElementType,Type>::value &&
2627  IsSame<typename T2::ElementType,Type>::value &&
2628  IsSame<typename T3::ElementType,Type>::value };
2629  };
2630  //**********************************************************************************************
2631 
2632  //**********************************************************************************************
2634 
2636  template< typename T1, typename T2, typename T3, typename T4 >
2637  struct UseDefaultKernel {
2638  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2639  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2640  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2641  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2642  };
2643  //**********************************************************************************************
2644 
2645  //**********************************************************************************************
2647 
2649  template< typename T1, typename T2, typename T3, typename T4 >
2650  struct UseVectorizedDefaultKernel {
2651  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2652  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2653  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2654  IsSame<typename T1::ElementType,T4>::value &&
2655  IntrinsicTrait<typename T1::ElementType>::addition &&
2656  IntrinsicTrait<typename T1::ElementType>::subtraction &&
2657  IntrinsicTrait<typename T1::ElementType>::multiplication };
2658  };
2659  //**********************************************************************************************
2660 
2661  public:
2662  //**Type definitions****************************************************************************
2663  typedef DMatScalarMultExpr<MMM,ST,false> This;
2664  typedef typename MultTrait<RES,ST>::Type ResultType;
2665  typedef typename ResultType::OppositeType OppositeType;
2666  typedef typename ResultType::TransposeType TransposeType;
2667  typedef typename ResultType::ElementType ElementType;
2668  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2669  typedef const ElementType ReturnType;
2670  typedef const ResultType CompositeType;
2671 
2673  typedef const DMatDMatMultExpr<MT1,MT2> LeftOperand;
2674 
2676  typedef ST RightOperand;
2677 
2679  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2680 
2682  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2683  //**********************************************************************************************
2684 
2685  //**Compilation flags***************************************************************************
2687  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2688  IsSame<ET1,ET2>::value &&
2689  IsSame<ET1,ST>::value &&
2690  IntrinsicTrait<ET1>::addition &&
2691  IntrinsicTrait<ET1>::multiplication };
2692 
2694  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2695  !evaluateRight && MT2::smpAssignable };
2696  //**********************************************************************************************
2697 
2698  //**Constructor*********************************************************************************
2704  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2705  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2706  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2707  {}
2708  //**********************************************************************************************
2709 
2710  //**Access operator*****************************************************************************
2717  inline ReturnType operator()( size_t i, size_t j ) const {
2718  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2719  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2720  return matrix_(i,j) * scalar_;
2721  }
2722  //**********************************************************************************************
2723 
2724  //**Rows function*******************************************************************************
2729  inline size_t rows() const {
2730  return matrix_.rows();
2731  }
2732  //**********************************************************************************************
2733 
2734  //**Columns function****************************************************************************
2739  inline size_t columns() const {
2740  return matrix_.columns();
2741  }
2742  //**********************************************************************************************
2743 
2744  //**Left operand access*************************************************************************
2749  inline LeftOperand leftOperand() const {
2750  return matrix_;
2751  }
2752  //**********************************************************************************************
2753 
2754  //**Right operand access************************************************************************
2759  inline RightOperand rightOperand() const {
2760  return scalar_;
2761  }
2762  //**********************************************************************************************
2763 
2764  //**********************************************************************************************
2770  template< typename T >
2771  inline bool canAlias( const T* alias ) const {
2772  return matrix_.canAlias( alias );
2773  }
2774  //**********************************************************************************************
2775 
2776  //**********************************************************************************************
2782  template< typename T >
2783  inline bool isAliased( const T* alias ) const {
2784  return matrix_.isAliased( alias );
2785  }
2786  //**********************************************************************************************
2787 
2788  //**********************************************************************************************
2793  inline bool isAligned() const {
2794  return matrix_.isAligned();
2795  }
2796  //**********************************************************************************************
2797 
2798  //**********************************************************************************************
2803  inline bool canSMPAssign() const {
2804  typename MMM::LeftOperand A( matrix_.leftOperand() );
2805  return ( !BLAZE_BLAS_IS_PARALLEL ||
2806  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
2807  ( A.rows() > SMP_DMATDMATMULT_THRESHOLD );
2808  }
2809  //**********************************************************************************************
2810 
2811  private:
2812  //**Member variables****************************************************************************
2813  LeftOperand matrix_;
2814  RightOperand scalar_;
2815  //**********************************************************************************************
2816 
2817  //**BLAS kernel (single precision)**************************************************************
2818 #if BLAZE_BLAS_MODE
2819 
2833  template< typename MT3 // Type of the left-hand side target matrix
2834  , typename MT4 // Type of the left-hand side matrix operand
2835  , typename MT5 > // Type of the right-hand side matrix operand
2836  static inline void sgemm( MT3& C, const MT4& A, const MT5& B, float alpha, float beta )
2837  {
2838  using boost::numeric_cast;
2839 
2843 
2844  const int M ( numeric_cast<int>( A.rows() ) );
2845  const int N ( numeric_cast<int>( B.columns() ) );
2846  const int K ( numeric_cast<int>( A.columns() ) );
2847  const int lda( numeric_cast<int>( A.spacing() ) );
2848  const int ldb( numeric_cast<int>( B.spacing() ) );
2849  const int ldc( numeric_cast<int>( C.spacing() ) );
2850 
2851  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
2852  cblas_ssymm( CblasRowMajor, CblasLeft, CblasLower,
2853  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2854  }
2855  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2856  cblas_ssymm( CblasRowMajor, CblasRight, CblasLower,
2857  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2858  }
2859  else {
2860  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2861  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2862  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2863  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2864  }
2865  }
2866 #endif
2867  //**********************************************************************************************
2868 
2869  //**BLAS kernel (double precision)**************************************************************
2870 #if BLAZE_BLAS_MODE
2871 
2885  template< typename MT3 // Type of the left-hand side target matrix
2886  , typename MT4 // Type of the left-hand side matrix operand
2887  , typename MT5 > // Type of the right-hand side matrix operand
2888  static inline void dgemm( MT3& C, const MT4& A, const MT5& B, double alpha, double beta )
2889  {
2890  using boost::numeric_cast;
2891 
2895 
2896  const int M ( numeric_cast<int>( A.rows() ) );
2897  const int N ( numeric_cast<int>( B.columns() ) );
2898  const int K ( numeric_cast<int>( A.columns() ) );
2899  const int lda( numeric_cast<int>( A.spacing() ) );
2900  const int ldb( numeric_cast<int>( B.spacing() ) );
2901  const int ldc( numeric_cast<int>( C.spacing() ) );
2902 
2903  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
2904  cblas_dsymm( CblasRowMajor, CblasLeft, CblasLower,
2905  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2906  }
2907  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2908  cblas_dsymm( CblasRowMajor, CblasRight, CblasLower,
2909  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2910  }
2911  else {
2912  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2913  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2914  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2915  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2916  }
2917  }
2918 #endif
2919  //**********************************************************************************************
2920 
2921  //**BLAS kernel (single precision complex)******************************************************
2922 #if BLAZE_BLAS_MODE
2923 
2937  template< typename MT3 // Type of the left-hand side target matrix
2938  , typename MT4 // Type of the left-hand side matrix operand
2939  , typename MT5 > // Type of the right-hand side matrix operand
2940  static inline void cgemm( MT3& C, const MT4& A, const MT5& B,
2941  complex<float> alpha, complex<float> beta )
2942  {
2943  using boost::numeric_cast;
2944 
2948  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2949  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2950  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2951 
2952  const int M ( numeric_cast<int>( A.rows() ) );
2953  const int N ( numeric_cast<int>( B.columns() ) );
2954  const int K ( numeric_cast<int>( A.columns() ) );
2955  const int lda( numeric_cast<int>( A.spacing() ) );
2956  const int ldb( numeric_cast<int>( B.spacing() ) );
2957  const int ldc( numeric_cast<int>( C.spacing() ) );
2958 
2959  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
2960  cblas_csymm( CblasRowMajor, CblasLeft, CblasLower,
2961  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2962  }
2963  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2964  cblas_csymm( CblasRowMajor, CblasRight, CblasLower,
2965  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
2966  }
2967  else {
2968  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2969  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2970  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2971  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2972  }
2973  }
2974 #endif
2975  //**********************************************************************************************
2976 
2977  //**BLAS kernel (double precision complex)******************************************************
2978 #if BLAZE_BLAS_MODE
2979 
2993  template< typename MT3 // Type of the left-hand side target matrix
2994  , typename MT4 // Type of the left-hand side matrix operand
2995  , typename MT5 > // Type of the right-hand side matrix operand
2996  static inline void zgemm( MT3& C, const MT4& A, const MT5& B,
2997  complex<double> alpha, complex<double> beta )
2998  {
2999  using boost::numeric_cast;
3000 
3004  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3005  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3006  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3007 
3008  const int M ( numeric_cast<int>( A.rows() ) );
3009  const int N ( numeric_cast<int>( B.columns() ) );
3010  const int K ( numeric_cast<int>( A.columns() ) );
3011  const int lda( numeric_cast<int>( A.spacing() ) );
3012  const int ldb( numeric_cast<int>( B.spacing() ) );
3013  const int ldc( numeric_cast<int>( C.spacing() ) );
3014 
3015  if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
3016  cblas_zsymm( CblasRowMajor, CblasLeft, CblasLower,
3017  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3018  }
3019  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
3020  cblas_zsymm( CblasRowMajor, CblasRight, CblasLower,
3021  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
3022  }
3023  else {
3024  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3025  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3026  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3027  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3028  }
3029  }
3030 #endif
3031  //**********************************************************************************************
3032 
3033  //**Assignment to dense matrices****************************************************************
3045  template< typename MT // Type of the target dense matrix
3046  , bool SO > // Storage order of the target dense matrix
3047  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3048  assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3049  {
3051 
3052  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3053  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3054 
3055  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3056  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3057 
3058  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
3059  return;
3060  }
3061  else if( left.columns() == 0UL ) {
3062  reset( ~lhs );
3063  return;
3064  }
3065 
3066  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3067  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3068 
3069  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3070  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3071  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3072  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3073  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3074  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3075 
3076  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
3077  }
3078  //**********************************************************************************************
3079 
3080  //**Assignment to dense matrices (kernel selection)*********************************************
3091  template< typename MT3 // Type of the left-hand side target matrix
3092  , typename MT4 // Type of the left-hand side matrix operand
3093  , typename MT5 // Type of the right-hand side matrix operand
3094  , typename ST2 > // Type of the scalar value
3095  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3096  {
3097  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
3098  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
3099  else
3100  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
3101  }
3102  //**********************************************************************************************
3103 
3104  //**Default assignment to dense matrices********************************************************
3118  template< typename MT3 // Type of the left-hand side target matrix
3119  , typename MT4 // Type of the left-hand side matrix operand
3120  , typename MT5 // Type of the right-hand side matrix operand
3121  , typename ST2 > // Type of the scalar value
3122  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3123  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3124  {
3125  const size_t M( A.rows() );
3126  const size_t N( B.columns() );
3127  const size_t K( A.columns() );
3128 
3129  for( size_t i=0UL; i<M; ++i ) {
3130  for( size_t j=0UL; j<N; ++j ) {
3131  C(i,j) = A(i,0UL) * B(0UL,j);
3132  }
3133  for( size_t k=1UL; k<K; ++k ) {
3134  for( size_t j=0UL; j<N; ++j ) {
3135  C(i,j) += A(i,k) * B(k,j);
3136  }
3137  }
3138  for( size_t j=0UL; j<N; ++j ) {
3139  C(i,j) *= scalar;
3140  }
3141  }
3142  }
3143  //**********************************************************************************************
3144 
3145  //**Vectorized default assignment to row-major dense matrices***********************************
3159  template< typename MT3 // Type of the left-hand side target matrix
3160  , typename MT4 // Type of the left-hand side matrix operand
3161  , typename MT5 // Type of the right-hand side matrix operand
3162  , typename ST2 > // Type of the scalar value
3163  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3164  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3165  {
3166  typedef IntrinsicTrait<ElementType> IT;
3167 
3168  const size_t M( A.rows() );
3169  const size_t N( B.columns() );
3170  const size_t K( A.columns() );
3171 
3172  const IntrinsicType factor( set( scalar ) );
3173 
3174  size_t j( 0UL );
3175 
3176  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3177  for( size_t i=0UL; i<M; ++i ) {
3178  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3179  for( size_t k=0UL; k<K; ++k ) {
3180  const IntrinsicType a1( set( A(i,k) ) );
3181  xmm1 = xmm1 + a1 * B.load(k,j );
3182  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3183  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3184  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3185  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3186  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3187  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3188  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3189  }
3190  (~C).store( i, j , xmm1 * factor );
3191  (~C).store( i, j+IT::size , xmm2 * factor );
3192  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
3193  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
3194  (~C).store( i, j+IT::size*4UL, xmm5 * factor );
3195  (~C).store( i, j+IT::size*5UL, xmm6 * factor );
3196  (~C).store( i, j+IT::size*6UL, xmm7 * factor );
3197  (~C).store( i, j+IT::size*7UL, xmm8 * factor );
3198  }
3199  }
3200  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3201  size_t i( 0UL );
3202  for( ; (i+2UL) <= M; i+=2UL ) {
3203  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3204  for( size_t k=0UL; k<K; ++k ) {
3205  const IntrinsicType a1( set( A(i ,k) ) );
3206  const IntrinsicType a2( set( A(i+1UL,k) ) );
3207  const IntrinsicType b1( B.load(k,j ) );
3208  const IntrinsicType b2( B.load(k,j+IT::size ) );
3209  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3210  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3211  xmm1 = xmm1 + a1 * b1;
3212  xmm2 = xmm2 + a1 * b2;
3213  xmm3 = xmm3 + a1 * b3;
3214  xmm4 = xmm4 + a1 * b4;
3215  xmm5 = xmm5 + a2 * b1;
3216  xmm6 = xmm6 + a2 * b2;
3217  xmm7 = xmm7 + a2 * b3;
3218  xmm8 = xmm8 + a2 * b4;
3219  }
3220  (~C).store( i , j , xmm1 * factor );
3221  (~C).store( i , j+IT::size , xmm2 * factor );
3222  (~C).store( i , j+IT::size*2UL, xmm3 * factor );
3223  (~C).store( i , j+IT::size*3UL, xmm4 * factor );
3224  (~C).store( i+1UL, j , xmm5 * factor );
3225  (~C).store( i+1UL, j+IT::size , xmm6 * factor );
3226  (~C).store( i+1UL, j+IT::size*2UL, xmm7 * factor );
3227  (~C).store( i+1UL, j+IT::size*3UL, xmm8 * factor );
3228  }
3229  if( i < M ) {
3230  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3231  for( size_t k=0UL; k<K; ++k ) {
3232  const IntrinsicType a1( set( A(i,k) ) );
3233  xmm1 = xmm1 + a1 * B.load(k,j );
3234  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3235  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3236  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3237  }
3238  (~C).store( i, j , xmm1 * factor );
3239  (~C).store( i, j+IT::size , xmm2 * factor );
3240  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
3241  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
3242  }
3243  }
3244  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3245  size_t i( 0UL );
3246  for( ; (i+2UL) <= M; i+=2UL ) {
3247  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3248  for( size_t k=0UL; k<K; ++k ) {
3249  const IntrinsicType a1( set( A(i ,k) ) );
3250  const IntrinsicType a2( set( A(i+1UL,k) ) );
3251  const IntrinsicType b1( B.load(k,j ) );
3252  const IntrinsicType b2( B.load(k,j+IT::size) );
3253  xmm1 = xmm1 + a1 * b1;
3254  xmm2 = xmm2 + a1 * b2;
3255  xmm3 = xmm3 + a2 * b1;
3256  xmm4 = xmm4 + a2 * b2;
3257  }
3258  (~C).store( i , j , xmm1 * factor );
3259  (~C).store( i , j+IT::size, xmm2 * factor );
3260  (~C).store( i+1UL, j , xmm3 * factor );
3261  (~C).store( i+1UL, j+IT::size, xmm4 * factor );
3262  }
3263  if( i < M ) {
3264  IntrinsicType xmm1, xmm2;
3265  for( size_t k=0UL; k<K; ++k ) {
3266  const IntrinsicType a1( set( A(i,k) ) );
3267  xmm1 = xmm1 + a1 * B.load(k,j );
3268  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3269  }
3270  (~C).store( i, j , xmm1 * factor );
3271  (~C).store( i, j+IT::size, xmm2 * factor );
3272  }
3273  }
3274  if( j < N ) {
3275  size_t i( 0UL );
3276  for( ; (i+2UL) <= M; i+=2UL ) {
3277  IntrinsicType xmm1, xmm2;
3278  for( size_t k=0UL; k<K; ++k ) {
3279  const IntrinsicType b1( B.load(k,j) );
3280  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3281  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3282  }
3283  (~C).store( i , j, xmm1 * factor );
3284  (~C).store( i+1UL, j, xmm2 * factor );
3285  }
3286  if( i < M ) {
3287  IntrinsicType xmm1;
3288  for( size_t k=0UL; k<K; ++k ) {
3289  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
3290  }
3291  (~C).store( i, j, xmm1 * factor );
3292  }
3293  }
3294  }
3295  //**********************************************************************************************
3296 
3297  //**Vectorized default assignment to column-major dense matrices********************************
3311  template< typename MT3 // Type of the left-hand side target matrix
3312  , typename MT4 // Type of the left-hand side matrix operand
3313  , typename MT5 // Type of the right-hand side matrix operand
3314  , typename ST2 > // Type of the scalar value
3315  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3316  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3317  {
3320 
3321  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3322  const typename MT4::OppositeType tmp( serial( A ) );
3323  assign( ~C, tmp * B * scalar );
3324  }
3325  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3326  const typename MT5::OppositeType tmp( serial( B ) );
3327  assign( ~C, A * tmp * scalar );
3328  }
3329  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3330  const typename MT4::OppositeType tmp( serial( A ) );
3331  assign( ~C, tmp * B * scalar );
3332  }
3333  else {
3334  const typename MT5::OppositeType tmp( serial( B ) );
3335  assign( ~C, A * tmp * scalar );
3336  }
3337  }
3338  //**********************************************************************************************
3339 
3340  //**BLAS-based assignment to dense matrices (default)*******************************************
3354  template< typename MT3 // Type of the left-hand side target matrix
3355  , typename MT4 // Type of the left-hand side matrix operand
3356  , typename MT5 // Type of the right-hand side matrix operand
3357  , typename ST2 > // Type of the scalar value
3358  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3359  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3360  {
3361  selectDefaultAssignKernel( C, A, B, scalar );
3362  }
3363  //**********************************************************************************************
3364 
3365  //**BLAS-based assignment to dense matrices (single precision)**********************************
3366 #if BLAZE_BLAS_MODE
3367 
3380  template< typename MT3 // Type of the left-hand side target matrix
3381  , typename MT4 // Type of the left-hand side matrix operand
3382  , typename MT5 // Type of the right-hand side matrix operand
3383  , typename ST2 > // Type of the scalar value
3384  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3385  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3386  {
3387  sgemm( C, A, B, scalar, 0.0F );
3388  }
3389 #endif
3390  //**********************************************************************************************
3391 
3392  //**BLAS-based assignment to dense matrices (double precision)**********************************
3393 #if BLAZE_BLAS_MODE
3394 
3407  template< typename MT3 // Type of the left-hand side target matrix
3408  , typename MT4 // Type of the left-hand side matrix operand
3409  , typename MT5 // Type of the right-hand side matrix operand
3410  , typename ST2 > // Type of the scalar value
3411  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3412  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3413  {
3414  dgemm( C, A, B, scalar, 0.0 );
3415  }
3416 #endif
3417  //**********************************************************************************************
3418 
3419  //**BLAS-based assignment to dense matrices (single precision complex)**************************
3420 #if BLAZE_BLAS_MODE
3421 
3434  template< typename MT3 // Type of the left-hand side target matrix
3435  , typename MT4 // Type of the left-hand side matrix operand
3436  , typename MT5 // Type of the right-hand side matrix operand
3437  , typename ST2 > // Type of the scalar value
3438  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3439  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3440  {
3441  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3442  }
3443 #endif
3444  //**********************************************************************************************
3445 
3446  //**BLAS-based assignment to dense matrices (double precision complex)**************************
3447 #if BLAZE_BLAS_MODE
3448 
3461  template< typename MT3 // Type of the left-hand side target matrix
3462  , typename MT4 // Type of the left-hand side matrix operand
3463  , typename MT5 // Type of the right-hand side matrix operand
3464  , typename ST2 > // Type of the scalar
3465  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3466  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3467  {
3468  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3469  }
3470 #endif
3471  //**********************************************************************************************
3472 
3473  //**Assignment to sparse matrices***************************************************************
3485  template< typename MT // Type of the target sparse matrix
3486  , bool SO > // Storage order of the target sparse matrix
3487  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3488  assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3489  {
3491 
3492  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
3493 
3500 
3501  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3502  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3503 
3504  const TmpType tmp( serial( rhs ) );
3505  assign( ~lhs, tmp );
3506  }
3507  //**********************************************************************************************
3508 
3509  //**Restructuring assignment to column-major matrices*******************************************
3523  template< typename MT > // Type of the target matrix
3524  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3525  assign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
3526  {
3528 
3530 
3531  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3532  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3533 
3534  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3535  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3536 
3537  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
3538  assign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
3539  else if( IsSymmetric<MT1>::value )
3540  assign( ~lhs, trans( left ) * right * rhs.scalar_ );
3541  else
3542  assign( ~lhs, left * trans( right ) * rhs.scalar_ );
3543  }
3544  //**********************************************************************************************
3545 
3546  //**Addition assignment to dense matrices*******************************************************
3558  template< typename MT // Type of the target dense matrix
3559  , bool SO > // Storage order of the target dense matrix
3560  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3561  addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3562  {
3564 
3565  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3566  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3567 
3568  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3569  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3570 
3571  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3572  return;
3573  }
3574 
3575  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3576  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3577 
3578  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3579  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3580  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3581  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3582  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3583  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3584 
3585  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3586  }
3587  //**********************************************************************************************
3588 
3589  //**Addition assignment to dense matrices (kernel selection)************************************
3600  template< typename MT3 // Type of the left-hand side target matrix
3601  , typename MT4 // Type of the left-hand side matrix operand
3602  , typename MT5 // Type of the right-hand side matrix operand
3603  , typename ST2 > // Type of the scalar value
3604  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3605  {
3606  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
3607  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3608  else
3609  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3610  }
3611  //**********************************************************************************************
3612 
3613  //**Default addition assignment to dense matrices***********************************************
3627  template< typename MT3 // Type of the left-hand side target matrix
3628  , typename MT4 // Type of the left-hand side matrix operand
3629  , typename MT5 // Type of the right-hand side matrix operand
3630  , typename ST2 > // Type of the scalar value
3631  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3632  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3633  {
3634  const ResultType tmp( serial( A * B * scalar ) );
3635  addAssign( C, tmp );
3636  }
3637  //**********************************************************************************************
3638 
3639  //**Vectorized default addition assignment to row-major dense matrices**************************
3653  template< typename MT3 // Type of the left-hand side target matrix
3654  , typename MT4 // Type of the left-hand side matrix operand
3655  , typename MT5 // Type of the right-hand side matrix operand
3656  , typename ST2 > // Type of the scalar value
3657  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3658  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3659  {
3660  typedef IntrinsicTrait<ElementType> IT;
3661 
3662  const size_t M( A.rows() );
3663  const size_t N( B.columns() );
3664  const size_t K( A.columns() );
3665 
3666  const IntrinsicType factor( set( scalar ) );
3667 
3668  size_t j( 0UL );
3669 
3670  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3671  for( size_t i=0UL; i<M; ++i ) {
3672  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3673  for( size_t k=0UL; k<K; ++k ) {
3674  const IntrinsicType a1( set( A(i,k) ) );
3675  xmm1 = xmm1 + a1 * B.load(k,j );
3676  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3677  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3678  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3679  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3680  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3681  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3682  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3683  }
3684  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3685  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3686  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3687  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3688  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) + xmm5 * factor );
3689  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) + xmm6 * factor );
3690  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) + xmm7 * factor );
3691  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) + xmm8 * factor );
3692  }
3693  }
3694  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3695  size_t i( 0UL );
3696  for( ; (i+2UL) <= M; i+=2UL ) {
3697  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3698  for( size_t k=0UL; k<K; ++k ) {
3699  const IntrinsicType a1( set( A(i ,k) ) );
3700  const IntrinsicType a2( set( A(i+1UL,k) ) );
3701  const IntrinsicType b1( B.load(k,j ) );
3702  const IntrinsicType b2( B.load(k,j+IT::size ) );
3703  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3704  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3705  xmm1 = xmm1 + a1 * b1;
3706  xmm2 = xmm2 + a1 * b2;
3707  xmm3 = xmm3 + a1 * b3;
3708  xmm4 = xmm4 + a1 * b4;
3709  xmm5 = xmm5 + a2 * b1;
3710  xmm6 = xmm6 + a2 * b2;
3711  xmm7 = xmm7 + a2 * b3;
3712  xmm8 = xmm8 + a2 * b4;
3713  }
3714  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3715  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) + xmm2 * factor );
3716  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) + xmm3 * factor );
3717  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) + xmm4 * factor );
3718  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
3719  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) + xmm6 * factor );
3720  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) + xmm7 * factor );
3721  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) + xmm8 * factor );
3722  }
3723  if( i < M ) {
3724  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3725  for( size_t k=0UL; k<K; ++k ) {
3726  const IntrinsicType a1( set( A(i,k) ) );
3727  xmm1 = xmm1 + a1 * B.load(k,j );
3728  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3729  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3730  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3731  }
3732  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3733  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3734  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3735  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3736  }
3737  }
3738  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3739  size_t i( 0UL );
3740  for( ; (i+2UL) <= M; i+=2UL ) {
3741  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3742  for( size_t k=0UL; k<K; ++k ) {
3743  const IntrinsicType a1( set( A(i ,k) ) );
3744  const IntrinsicType a2( set( A(i+1UL,k) ) );
3745  const IntrinsicType b1( B.load(k,j ) );
3746  const IntrinsicType b2( B.load(k,j+IT::size) );
3747  xmm1 = xmm1 + a1 * b1;
3748  xmm2 = xmm2 + a1 * b2;
3749  xmm3 = xmm3 + a2 * b1;
3750  xmm4 = xmm4 + a2 * b2;
3751  }
3752  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3753  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) + xmm2 * factor );
3754  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
3755  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) + xmm4 * factor );
3756  }
3757  if( i < M ) {
3758  IntrinsicType xmm1, xmm2;
3759  for( size_t k=0UL; k<K; ++k ) {
3760  const IntrinsicType a1( set( A(i,k) ) );
3761  xmm1 = xmm1 + a1 * B.load(k,j );
3762  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3763  }
3764  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3765  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) + xmm2 * factor );
3766  }
3767  }
3768  if( j < N ) {
3769  size_t i( 0UL );
3770  for( ; (i+2UL) <= M; i+=2UL ) {
3771  IntrinsicType xmm1, xmm2;
3772  for( size_t k=0UL; k<K; ++k ) {
3773  const IntrinsicType b1( B.load(k,j) );
3774  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3775  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3776  }
3777  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3778  (~C).store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
3779  }
3780  if( i < M ) {
3781  IntrinsicType xmm1;
3782  for( size_t k=0UL; k<K; ++k ) {
3783  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
3784  }
3785  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
3786  }
3787  }
3788  }
3789  //**********************************************************************************************
3790 
3791  //**Vectorized default addition assignment to column-major dense matrices***********************
3805  template< typename MT3 // Type of the left-hand side target matrix
3806  , typename MT4 // Type of the left-hand side matrix operand
3807  , typename MT5 // Type of the right-hand side matrix operand
3808  , typename ST2 > // Type of the scalar value
3809  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3810  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3811  {
3814 
3815  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3816  const typename MT4::OppositeType tmp( serial( A ) );
3817  addAssign( ~C, tmp * B * scalar );
3818  }
3819  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3820  const typename MT5::OppositeType tmp( serial( B ) );
3821  addAssign( ~C, A * tmp * scalar );
3822  }
3823  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3824  const typename MT4::OppositeType tmp( serial( A ) );
3825  addAssign( ~C, tmp * B * scalar );
3826  }
3827  else {
3828  const typename MT5::OppositeType tmp( serial( B ) );
3829  addAssign( ~C, A * tmp * scalar );
3830  }
3831  }
3832  //**********************************************************************************************
3833 
3834  //**BLAS-based addition assignment to dense matrices (default)**********************************
3848  template< typename MT3 // Type of the left-hand side target matrix
3849  , typename MT4 // Type of the left-hand side matrix operand
3850  , typename MT5 // Type of the right-hand side matrix operand
3851  , typename ST2 > // Type of the scalar value
3852  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3853  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3854  {
3855  selectDefaultAddAssignKernel( C, A, B, scalar );
3856  }
3857  //**********************************************************************************************
3858 
3859  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3860 #if BLAZE_BLAS_MODE
3861 
3874  template< typename MT3 // Type of the left-hand side target matrix
3875  , typename MT4 // Type of the left-hand side matrix operand
3876  , typename MT5 // Type of the right-hand side matrix operand
3877  , typename ST2 > // Type of the scalar value
3878  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3879  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3880  {
3881  sgemm( C, A, B, scalar, 1.0F );
3882  }
3883 #endif
3884  //**********************************************************************************************
3885 
3886  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3887 #if BLAZE_BLAS_MODE
3888 
3901  template< typename MT3 // Type of the left-hand side target matrix
3902  , typename MT4 // Type of the left-hand side matrix operand
3903  , typename MT5 // Type of the right-hand side matrix operand
3904  , typename ST2 > // Type of the scalar value
3905  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3906  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3907  {
3908  dgemm( C, A, B, scalar, 1.0 );
3909  }
3910 #endif
3911  //**********************************************************************************************
3912 
3913  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3914 #if BLAZE_BLAS_MODE
3915 
3928  template< typename MT3 // Type of the left-hand side target matrix
3929  , typename MT4 // Type of the left-hand side matrix operand
3930  , typename MT5 // Type of the right-hand side matrix operand
3931  , typename ST2 > // Type of the scalar value
3932  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3933  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3934  {
3935  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3936  }
3937 #endif
3938  //**********************************************************************************************
3939 
3940  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3941 #if BLAZE_BLAS_MODE
3942 
3955  template< typename MT3 // Type of the left-hand side target matrix
3956  , typename MT4 // Type of the left-hand side matrix operand
3957  , typename MT5 // Type of the right-hand side matrix operand
3958  , typename ST2 > // Type of the scalar value
3959  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3960  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3961  {
3962  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3963  }
3964 #endif
3965  //**********************************************************************************************
3966 
3967  //**Restructuring addition assignment to column-major matrices**********************************
3981  template< typename MT > // Type of the target matrix
3982  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3983  addAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
3984  {
3986 
3988 
3989  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3990  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3991 
3992  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3993  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3994 
3995  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
3996  addAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
3997  else if( IsSymmetric<MT1>::value )
3998  addAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
3999  else
4000  addAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4001  }
4002  //**********************************************************************************************
4003 
4004  //**Addition assignment to sparse matrices******************************************************
4005  // No special implementation for the addition assignment to sparse matrices.
4006  //**********************************************************************************************
4007 
4008  //**Subtraction assignment to dense matrices****************************************************
4020  template< typename MT // Type of the target dense matrix
4021  , bool SO > // Storage order of the target dense matrix
4022  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4023  subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4024  {
4026 
4027  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4028  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4029 
4030  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4031  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4032 
4033  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4034  return;
4035  }
4036 
4037  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4038  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4039 
4040  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4041  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4042  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4043  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4044  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4045  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4046 
4047  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
4048  }
4049  //**********************************************************************************************
4050 
4051  //**Subtraction assignment to dense matrices (kernel selection)*********************************
4062  template< typename MT3 // Type of the left-hand side target matrix
4063  , typename MT4 // Type of the left-hand side matrix operand
4064  , typename MT5 // Type of the right-hand side matrix operand
4065  , typename ST2 > // Type of the scalar value
4066  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4067  {
4068  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
4069  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
4070  else
4071  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
4072  }
4073  //**********************************************************************************************
4074 
4075  //**Default subtraction assignment to dense matrices********************************************
4089  template< typename MT3 // Type of the left-hand side target matrix
4090  , typename MT4 // Type of the left-hand side matrix operand
4091  , typename MT5 // Type of the right-hand side matrix operand
4092  , typename ST2 > // Type of the scalar value
4093  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4094  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4095  {
4096  const ResultType tmp( serial( A * B * scalar ) );
4097  subAssign( C, tmp );
4098  }
4099  //**********************************************************************************************
4100 
4101  //**Vectorized default subtraction assignment to row-major dense matrices***********************
4115  template< typename MT3 // Type of the left-hand side target matrix
4116  , typename MT4 // Type of the left-hand side matrix operand
4117  , typename MT5 // Type of the right-hand side matrix operand
4118  , typename ST2 > // Type of the scalar value
4119  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4120  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4121  {
4122  typedef IntrinsicTrait<ElementType> IT;
4123 
4124  const size_t M( A.rows() );
4125  const size_t N( B.columns() );
4126  const size_t K( A.columns() );
4127 
4128  const IntrinsicType factor( set( scalar ) );
4129 
4130  size_t j( 0UL );
4131 
4132  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
4133  for( size_t i=0UL; i<M; ++i ) {
4134  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4135  for( size_t k=0UL; k<K; ++k ) {
4136  const IntrinsicType a1( set( A(i,k) ) );
4137  xmm1 = xmm1 + a1 * B.load(k,j );
4138  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
4139  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
4140  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
4141  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
4142  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
4143  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
4144  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
4145  }
4146  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
4147  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
4148  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
4149  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
4150  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) - xmm5 * factor );
4151  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) - xmm6 * factor );
4152  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) - xmm7 * factor );
4153  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) - xmm8 * factor );
4154  }
4155  }
4156  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
4157  size_t i( 0UL );
4158  for( ; (i+2UL) <= M; i+=2UL ) {
4159  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4160  for( size_t k=0UL; k<K; ++k ) {
4161  const IntrinsicType a1( set( A(i ,k) ) );
4162  const IntrinsicType a2( set( A(i+1UL,k) ) );
4163  const IntrinsicType b1( B.load(k,j ) );
4164  const IntrinsicType b2( B.load(k,j+IT::size ) );
4165  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
4166  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
4167  xmm1 = xmm1 + a1 * b1;
4168  xmm2 = xmm2 + a1 * b2;
4169  xmm3 = xmm3 + a1 * b3;
4170  xmm4 = xmm4 + a1 * b4;
4171  xmm5 = xmm5 + a2 * b1;
4172  xmm6 = xmm6 + a2 * b2;
4173  xmm7 = xmm7 + a2 * b3;
4174  xmm8 = xmm8 + a2 * b4;
4175  }
4176  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4177  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) - xmm2 * factor );
4178  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) - xmm3 * factor );
4179  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) - xmm4 * factor );
4180  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
4181  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) - xmm6 * factor );
4182  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) - xmm7 * factor );
4183  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) - xmm8 * factor );
4184  }
4185  if( i < M ) {
4186  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4187  for( size_t k=0UL; k<K; ++k ) {
4188  const IntrinsicType a1( set( A(i,k) ) );
4189  xmm1 = xmm1 + a1 * B.load(k,j );
4190  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
4191  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
4192  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
4193  }
4194  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
4195  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
4196  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
4197  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
4198  }
4199  }
4200  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
4201  size_t i( 0UL );
4202  for( ; (i+2UL) <= M; i+=2UL ) {
4203  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4204  for( size_t k=0UL; k<K; ++k ) {
4205  const IntrinsicType a1( set( A(i ,k) ) );
4206  const IntrinsicType a2( set( A(i+1UL,k) ) );
4207  const IntrinsicType b1( B.load(k,j ) );
4208  const IntrinsicType b2( B.load(k,j+IT::size) );
4209  xmm1 = xmm1 + a1 * b1;
4210  xmm2 = xmm2 + a1 * b2;
4211  xmm3 = xmm3 + a2 * b1;
4212  xmm4 = xmm4 + a2 * b2;
4213  }
4214  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4215  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) - xmm2 * factor );
4216  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
4217  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) - xmm4 * factor );
4218  }
4219  if( i < M ) {
4220  IntrinsicType xmm1, xmm2;
4221  for( size_t k=0UL; k<K; ++k ) {
4222  const IntrinsicType a1( set( A(i,k) ) );
4223  xmm1 = xmm1 + a1 * B.load(k,j );
4224  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
4225  }
4226  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
4227  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) - xmm2 * factor );
4228  }
4229  }
4230  if( j < N ) {
4231  size_t i( 0UL );
4232  for( ; (i+2UL) <= M; i+=2UL ) {
4233  IntrinsicType xmm1, xmm2;
4234  for( size_t k=0UL; k<K; ++k ) {
4235  const IntrinsicType b1( B.load(k,j) );
4236  xmm1 = xmm1 + set( A(i ,k) ) * b1;
4237  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
4238  }
4239  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
4240  (~C).store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
4241  }
4242  if( i < M ) {
4243  IntrinsicType xmm1;
4244  for( size_t k=0UL; k<K; ++k ) {
4245  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
4246  }
4247  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
4248  }
4249  }
4250  }
4251  //**********************************************************************************************
4252 
4253  //**Vectorized default subtraction assignment to column-major dense matrices********************
4267  template< typename MT3 // Type of the left-hand side target matrix
4268  , typename MT4 // Type of the left-hand side matrix operand
4269  , typename MT5 // Type of the right-hand side matrix operand
4270  , typename ST2 > // Type of the scalar value
4271  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4272  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4273  {
4276 
4277  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
4278  const typename MT4::OppositeType tmp( serial( A ) );
4279  subAssign( ~C, tmp * B * scalar );
4280  }
4281  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
4282  const typename MT5::OppositeType tmp( serial( B ) );
4283  subAssign( ~C, A * tmp * scalar );
4284  }
4285  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
4286  const typename MT4::OppositeType tmp( serial( A ) );
4287  subAssign( ~C, tmp * B * scalar );
4288  }
4289  else {
4290  const typename MT5::OppositeType tmp( serial( B ) );
4291  subAssign( ~C, A * tmp * scalar );
4292  }
4293  }
4294  //**********************************************************************************************
4295 
4296  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
4310  template< typename MT3 // Type of the left-hand side target matrix
4311  , typename MT4 // Type of the left-hand side matrix operand
4312  , typename MT5 // Type of the right-hand side matrix operand
4313  , typename ST2 > // Type of the scalar value
4314  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4315  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4316  {
4317  selectDefaultSubAssignKernel( C, A, B, scalar );
4318  }
4319  //**********************************************************************************************
4320 
4321  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
4322 #if BLAZE_BLAS_MODE
4323 
4336  template< typename MT3 // Type of the left-hand side target matrix
4337  , typename MT4 // Type of the left-hand side matrix operand
4338  , typename MT5 // Type of the right-hand side matrix operand
4339  , typename ST2 > // Type of the scalar value
4340  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4341  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4342  {
4343  sgemm( C, A, B, -scalar, 1.0F );
4344  }
4345 #endif
4346  //**********************************************************************************************
4347 
4348  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
4349 #if BLAZE_BLAS_MODE
4350 
4363  template< typename MT3 // Type of the left-hand side target matrix
4364  , typename MT4 // Type of the left-hand side matrix operand
4365  , typename MT5 // Type of the right-hand side matrix operand
4366  , typename ST2 > // Type of the scalar value
4367  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4368  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4369  {
4370  dgemm( C, A, B, -scalar, 1.0 );
4371  }
4372 #endif
4373  //**********************************************************************************************
4374 
4375  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
4376 #if BLAZE_BLAS_MODE
4377 
4390  template< typename MT3 // Type of the left-hand side target matrix
4391  , typename MT4 // Type of the left-hand side matrix operand
4392  , typename MT5 // Type of the right-hand side matrix operand
4393  , typename ST2 > // Type of the scalar value
4394  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4395  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4396  {
4397  cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4398  }
4399 #endif
4400  //**********************************************************************************************
4401 
4402  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
4403 #if BLAZE_BLAS_MODE
4404 
4417  template< typename MT3 // Type of the left-hand side target matrix
4418  , typename MT4 // Type of the left-hand side matrix operand
4419  , typename MT5 // Type of the right-hand side matrix operand
4420  , typename ST2 > // Type of the scalar value
4421  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4422  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4423  {
4424  zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4425  }
4426 #endif
4427  //**********************************************************************************************
4428 
4429  //**Restructuring subtraction assignment to column-major matrices*******************************
4443  template< typename MT > // Type of the target matrix
4444  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4445  subAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
4446  {
4448 
4450 
4451  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4452  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4453 
4454  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4455  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4456 
4457  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4458  subAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
4459  else if( IsSymmetric<MT1>::value )
4460  subAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
4461  else
4462  subAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4463  }
4464  //**********************************************************************************************
4465 
4466  //**Subtraction assignment to sparse matrices***************************************************
4467  // No special implementation for the subtraction assignment to sparse matrices.
4468  //**********************************************************************************************
4469 
4470  //**Multiplication assignment to dense matrices*************************************************
4471  // No special implementation for the multiplication assignment to dense matrices.
4472  //**********************************************************************************************
4473 
4474  //**Multiplication assignment to sparse matrices************************************************
4475  // No special implementation for the multiplication assignment to sparse matrices.
4476  //**********************************************************************************************
4477 
4478  //**SMP assignment to dense matrices************************************************************
4493  template< typename MT // Type of the target dense matrix
4494  , bool SO > // Storage order of the target dense matrix
4495  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4496  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4497  {
4499 
4500  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4501  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4502 
4503  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4504  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4505 
4506  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4507  return;
4508  }
4509  else if( left.columns() == 0UL ) {
4510  reset( ~lhs );
4511  return;
4512  }
4513 
4514  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4515  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4516 
4517  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4518  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4519  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4520  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4521  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4522  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4523 
4524  smpAssign( ~lhs, A * B * rhs.scalar_ );
4525  }
4526  //**********************************************************************************************
4527 
4528  //**SMP assignment to sparse matrices***********************************************************
4543  template< typename MT // Type of the target sparse matrix
4544  , bool SO > // Storage order of the target sparse matrix
4545  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4546  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4547  {
4549 
4550  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
4551 
4558 
4559  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4560  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4561 
4562  const TmpType tmp( rhs );
4563  smpAssign( ~lhs, tmp );
4564  }
4565  //**********************************************************************************************
4566 
4567  //**Restructuring SMP assignment to column-major matrices***************************************
4581  template< typename MT > // Type of the target matrix
4582  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4583  smpAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
4584  {
4586 
4588 
4589  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4590  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4591 
4592  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4593  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4594 
4595  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4596  smpAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
4597  else if( IsSymmetric<MT1>::value )
4598  smpAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
4599  else
4600  smpAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4601  }
4602  //**********************************************************************************************
4603 
4604  //**SMP addition assignment to dense matrices***************************************************
4619  template< typename MT // Type of the target dense matrix
4620  , bool SO > // Storage order of the target dense matrix
4621  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4622  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4623  {
4625 
4626  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4627  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4628 
4629  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4630  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4631 
4632  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4633  return;
4634  }
4635 
4636  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4637  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4638 
4639  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4640  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4641  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4642  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4643  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4644  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4645 
4646  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
4647  }
4648  //**********************************************************************************************
4649 
4650  //**Restructuring SMP addition assignment to column-major matrices******************************
4664  template< typename MT > // Type of the target matrix
4665  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4666  smpAddAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
4667  {
4669 
4671 
4672  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4673  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4674 
4675  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4676  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4677 
4678  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4679  smpAddAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
4680  else if( IsSymmetric<MT1>::value )
4681  smpAddAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
4682  else
4683  smpAddAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4684  }
4685  //**********************************************************************************************
4686 
4687  //**SMP addition assignment to sparse matrices**************************************************
4688  // No special implementation for the SMP addition assignment to sparse matrices.
4689  //**********************************************************************************************
4690 
4691  //**SMP subtraction assignment to dense matrices************************************************
4706  template< typename MT // Type of the target dense matrix
4707  , bool SO > // Storage order of the target dense matrix
4708  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4709  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4710  {
4712 
4713  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4714  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4715 
4716  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4717  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4718 
4719  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4720  return;
4721  }
4722 
4723  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4724  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4725 
4726  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4727  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4728  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4729  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4730  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4731  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4732 
4733  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
4734  }
4735  //**********************************************************************************************
4736 
4737  //**Restructuring SMP subtraction assignment to column-major matrices***************************
4751  template< typename MT > // Type of the target matrix
4752  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4753  smpSubAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
4754  {
4756 
4758 
4759  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4760  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4761 
4762  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4763  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4764 
4765  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4766  smpSubAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
4767  else if( IsSymmetric<MT1>::value )
4768  smpSubAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
4769  else
4770  smpSubAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
4771  }
4772  //**********************************************************************************************
4773 
4774  //**SMP subtraction assignment to sparse matrices***********************************************
4775  // No special implementation for the SMP subtraction assignment to sparse matrices.
4776  //**********************************************************************************************
4777 
4778  //**SMP multiplication assignment to dense matrices*********************************************
4779  // No special implementation for the SMP multiplication assignment to dense matrices.
4780  //**********************************************************************************************
4781 
4782  //**SMP multiplication assignment to sparse matrices********************************************
4783  // No special implementation for the SMP multiplication assignment to sparse matrices.
4784  //**********************************************************************************************
4785 
4786  //**Compile time checks*************************************************************************
4795  //**********************************************************************************************
4796 };
4798 //*************************************************************************************************
4799 
4800 
4801 
4802 
4803 //=================================================================================================
4804 //
4805 // GLOBAL BINARY ARITHMETIC OPERATORS
4806 //
4807 //=================================================================================================
4808 
4809 //*************************************************************************************************
4835 template< typename T1 // Type of the left-hand side dense matrix
4836  , typename T2 > // Type of the right-hand side dense matrix
4837 inline const DMatDMatMultExpr<T1,T2>
4839 {
4841 
4842  if( (~lhs).columns() != (~rhs).rows() )
4843  throw std::invalid_argument( "Matrix sizes do not match" );
4844 
4845  return DMatDMatMultExpr<T1,T2>( ~lhs, ~rhs );
4846 }
4847 //*************************************************************************************************
4848 
4849 
4850 
4851 
4852 //=================================================================================================
4853 //
4854 // ROWS SPECIALIZATIONS
4855 //
4856 //=================================================================================================
4857 
4858 //*************************************************************************************************
4860 template< typename MT1, typename MT2 >
4861 struct Rows< DMatDMatMultExpr<MT1,MT2> >
4862  : public Rows<MT1>
4863 {};
4865 //*************************************************************************************************
4866 
4867 
4868 
4869 
4870 //=================================================================================================
4871 //
4872 // COLUMNS SPECIALIZATIONS
4873 //
4874 //=================================================================================================
4875 
4876 //*************************************************************************************************
4878 template< typename MT1, typename MT2 >
4879 struct Columns< DMatDMatMultExpr<MT1,MT2> >
4880  : public Columns<MT2>
4881 {};
4883 //*************************************************************************************************
4884 
4885 
4886 
4887 
4888 //=================================================================================================
4889 //
4890 // ISLOWER SPECIALIZATIONS
4891 //
4892 //=================================================================================================
4893 
4894 //*************************************************************************************************
4896 template< typename MT1, typename MT2 >
4897 struct IsLower< DMatDMatMultExpr<MT1,MT2> >
4898  : public IsTrue< IsLower<MT1>::value && IsLower<MT2>::value >
4899 {};
4901 //*************************************************************************************************
4902 
4903 
4904 
4905 
4906 //=================================================================================================
4907 //
4908 // ISUPPER SPECIALIZATIONS
4909 //
4910 //=================================================================================================
4911 
4912 //*************************************************************************************************
4914 template< typename MT1, typename MT2 >
4915 struct IsUpper< DMatDMatMultExpr<MT1,MT2> >
4916  : public IsTrue< IsUpper<MT1>::value && IsUpper<MT2>::value >
4917 {};
4919 //*************************************************************************************************
4920 
4921 
4922 
4923 
4924 //=================================================================================================
4925 //
4926 // EXPRESSION TRAIT SPECIALIZATIONS
4927 //
4928 //=================================================================================================
4929 
4930 //*************************************************************************************************
4932 template< typename MT1, typename MT2, typename VT >
4933 struct DMatDVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
4934 {
4935  public:
4936  //**********************************************************************************************
4937  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4938  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4939  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4940  , typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
4941  , INVALID_TYPE >::Type Type;
4942  //**********************************************************************************************
4943 };
4945 //*************************************************************************************************
4946 
4947 
4948 //*************************************************************************************************
4950 template< typename MT1, typename MT2, typename VT >
4951 struct DMatSVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
4952 {
4953  public:
4954  //**********************************************************************************************
4955  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4956  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4957  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4958  , typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
4959  , INVALID_TYPE >::Type Type;
4960  //**********************************************************************************************
4961 };
4963 //*************************************************************************************************
4964 
4965 
4966 //*************************************************************************************************
4968 template< typename VT, typename MT1, typename MT2 >
4969 struct TDVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
4970 {
4971  public:
4972  //**********************************************************************************************
4973  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4974  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4975  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4976  , typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4977  , INVALID_TYPE >::Type Type;
4978  //**********************************************************************************************
4979 };
4981 //*************************************************************************************************
4982 
4983 
4984 //*************************************************************************************************
4986 template< typename VT, typename MT1, typename MT2 >
4987 struct TSVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
4988 {
4989  public:
4990  //**********************************************************************************************
4991  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4992  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4993  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4994  , typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4995  , INVALID_TYPE >::Type Type;
4996  //**********************************************************************************************
4997 };
4999 //*************************************************************************************************
5000 
5001 
5002 //*************************************************************************************************
5004 template< typename MT1, typename MT2, bool AF >
5005 struct SubmatrixExprTrait< DMatDMatMultExpr<MT1,MT2>, AF >
5006 {
5007  public:
5008  //**********************************************************************************************
5009  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
5010  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
5011  //**********************************************************************************************
5012 };
5014 //*************************************************************************************************
5015 
5016 
5017 //*************************************************************************************************
5019 template< typename MT1, typename MT2 >
5020 struct RowExprTrait< DMatDMatMultExpr<MT1,MT2> >
5021 {
5022  public:
5023  //**********************************************************************************************
5024  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
5025  //**********************************************************************************************
5026 };
5028 //*************************************************************************************************
5029 
5030 
5031 //*************************************************************************************************
5033 template< typename MT1, typename MT2 >
5034 struct ColumnExprTrait< DMatDMatMultExpr<MT1,MT2> >
5035 {
5036  public:
5037  //**********************************************************************************************
5038  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
5039  //**********************************************************************************************
5040 };
5042 //*************************************************************************************************
5043 
5044 } // namespace blaze
5045 
5046 #endif
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:134
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
Header file for the SparseVector base class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:131
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:258
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:292
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:364
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
DMatDMatMultExpr< MT1, MT2 > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:279
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:283
Header file for the IsColumnMajorMatrix type trait.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:224
Header file for the DenseVector base class.
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:255
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:289
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:123
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2474
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:132
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:384
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsSymmetric type trait.
Header file for the IsDouble type trait.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:104
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDMatMultExpr.h:284
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDMatMultExpr.h:428
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
const size_t SMP_DMATDMATMULT_THRESHOLD
SMP row-major dense matrix/row-major dense matrix multiplication threshold.This threshold specifies w...
Definition: Thresholds.h:834
const size_t DMATDMATMULT_THRESHOLD
Row-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the thr...
Definition: Thresholds.h:125
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:319
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDMatMultExpr.h:438
Header file for the IsDenseMatrix type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:281
Header file for the EnableIf class template.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:406
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:394
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:334
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:447
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:211
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:280
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:286
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:374
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:282
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:298
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:295
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:285
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsRowMajorMatrix type trait.
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:932
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
Header file for the IsTrue value trait.
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
Header file for the complex data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:133
Header file for the IsUpper type trait.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:130
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T > >::Type store(T *address, const typename Store< T, sizeof(T)>::Type &value)
Aligned store of a vector of integral values.
Definition: Store.h:225
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:448
Header file for the IsResizable type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:418
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849