All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
53 #include <blaze/math/Intrinsics.h>
54 #include <blaze/math/shims/Reset.h>
82 #include <blaze/system/BLAS.h>
84 #include <blaze/util/Assert.h>
85 #include <blaze/util/Complex.h>
91 #include <blaze/util/DisableIf.h>
92 #include <blaze/util/EnableIf.h>
93 #include <blaze/util/InvalidType.h>
95 #include <blaze/util/SelectType.h>
96 #include <blaze/util/Types.h>
103 
104 
105 namespace blaze {
106 
107 //=================================================================================================
108 //
109 // CLASS DMATTDMATMULTEXPR
110 //
111 //=================================================================================================
112 
113 //*************************************************************************************************
120 template< typename MT1 // Type of the left-hand side dense matrix
121  , typename MT2 > // Type of the right-hand side dense matrix
122 class DMatTDMatMultExpr : public DenseMatrix< DMatTDMatMultExpr<MT1,MT2>, false >
123  , private MatMatMultExpr
124  , private Computation
125 {
126  private:
127  //**Type definitions****************************************************************************
128  typedef typename MT1::ResultType RT1;
129  typedef typename MT2::ResultType RT2;
130  typedef typename RT1::ElementType ET1;
131  typedef typename RT2::ElementType ET2;
132  typedef typename MT1::CompositeType CT1;
133  typedef typename MT2::CompositeType CT2;
134  //**********************************************************************************************
135 
136  //**********************************************************************************************
139  //**********************************************************************************************
140 
141  //**********************************************************************************************
143  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
144  //**********************************************************************************************
145 
146  //**********************************************************************************************
148 
152  template< typename T1, typename T2, typename T3 >
153  struct IsEvaluationRequired {
154  enum { value = ( evaluateLeft || evaluateRight ) };
155  };
157  //**********************************************************************************************
158 
159  //**********************************************************************************************
161 
164  template< typename T1, typename T2, typename T3 >
165  struct UseSinglePrecisionKernel {
166  enum { value = BLAZE_BLAS_MODE &&
167  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
168  IsFloat<typename T1::ElementType>::value &&
169  IsFloat<typename T2::ElementType>::value &&
170  IsFloat<typename T3::ElementType>::value };
171  };
173  //**********************************************************************************************
174 
175  //**********************************************************************************************
177 
180  template< typename T1, typename T2, typename T3 >
181  struct UseDoublePrecisionKernel {
182  enum { value = BLAZE_BLAS_MODE &&
183  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
184  IsDouble<typename T1::ElementType>::value &&
185  IsDouble<typename T2::ElementType>::value &&
186  IsDouble<typename T3::ElementType>::value };
187  };
189  //**********************************************************************************************
190 
191  //**********************************************************************************************
193 
197  template< typename T1, typename T2, typename T3 >
198  struct UseSinglePrecisionComplexKernel {
199  typedef complex<float> Type;
200  enum { value = BLAZE_BLAS_MODE &&
201  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
202  IsSame<typename T1::ElementType,Type>::value &&
203  IsSame<typename T2::ElementType,Type>::value &&
204  IsSame<typename T3::ElementType,Type>::value };
205  };
207  //**********************************************************************************************
208 
209  //**********************************************************************************************
211 
215  template< typename T1, typename T2, typename T3 >
216  struct UseDoublePrecisionComplexKernel {
217  typedef complex<double> Type;
218  enum { value = BLAZE_BLAS_MODE &&
219  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
220  IsSame<typename T1::ElementType,Type>::value &&
221  IsSame<typename T2::ElementType,Type>::value &&
222  IsSame<typename T3::ElementType,Type>::value };
223  };
225  //**********************************************************************************************
226 
227  //**********************************************************************************************
229 
232  template< typename T1, typename T2, typename T3 >
233  struct UseDefaultKernel {
234  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
235  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
236  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
237  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
238  };
240  //**********************************************************************************************
241 
242  //**********************************************************************************************
244 
247  template< typename T1, typename T2, typename T3 >
248  struct UseVectorizedDefaultKernel {
249  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
250  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
251  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
252  IntrinsicTrait<typename T1::ElementType>::addition &&
253  IntrinsicTrait<typename T1::ElementType>::multiplication };
254  };
256  //**********************************************************************************************
257 
258  public:
259  //**Type definitions****************************************************************************
266  typedef const ElementType ReturnType;
267  typedef const ResultType CompositeType;
268 
270  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
271 
273  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
274 
277 
280  //**********************************************************************************************
281 
282  //**Compilation flags***************************************************************************
284  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
288 
290  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
291  !evaluateRight && MT2::smpAssignable };
292  //**********************************************************************************************
293 
294  //**Constructor*********************************************************************************
300  explicit inline DMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
301  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
302  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
303  {
304  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
305  }
306  //**********************************************************************************************
307 
308  //**Access operator*****************************************************************************
315  inline ReturnType operator()( size_t i, size_t j ) const {
316  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
317  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
318 
319  ElementType tmp;
320 
321  if( lhs_.columns() != 0UL ) {
322  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
323  tmp = lhs_(i,0UL) * rhs_(0UL,j);
324  for( size_t k=1UL; k<end; k+=2UL ) {
325  tmp += lhs_(i,k ) * rhs_(k ,j);
326  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
327  }
328  if( end < lhs_.columns() ) {
329  tmp += lhs_(i,end) * rhs_(end,j);
330  }
331  }
332  else {
333  reset( tmp );
334  }
335 
336  return tmp;
337  }
338  //**********************************************************************************************
339 
340  //**Rows function*******************************************************************************
345  inline size_t rows() const {
346  return lhs_.rows();
347  }
348  //**********************************************************************************************
349 
350  //**Columns function****************************************************************************
355  inline size_t columns() const {
356  return rhs_.columns();
357  }
358  //**********************************************************************************************
359 
360  //**Left operand access*************************************************************************
365  inline LeftOperand leftOperand() const {
366  return lhs_;
367  }
368  //**********************************************************************************************
369 
370  //**Right operand access************************************************************************
375  inline RightOperand rightOperand() const {
376  return rhs_;
377  }
378  //**********************************************************************************************
379 
380  //**********************************************************************************************
386  template< typename T >
387  inline bool canAlias( const T* alias ) const {
388  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
389  }
390  //**********************************************************************************************
391 
392  //**********************************************************************************************
398  template< typename T >
399  inline bool isAliased( const T* alias ) const {
400  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
401  }
402  //**********************************************************************************************
403 
404  //**********************************************************************************************
409  inline bool isAligned() const {
410  return lhs_.isAligned() && rhs_.isAligned();
411  }
412  //**********************************************************************************************
413 
414  //**********************************************************************************************
419  inline bool canSMPAssign() const {
420  return ( !BLAZE_BLAS_IS_PARALLEL ||
421  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
423  }
424  //**********************************************************************************************
425 
426  private:
427  //**Member variables****************************************************************************
430  //**********************************************************************************************
431 
432  //**BLAS kernel (single precision)**************************************************************
433 #if BLAZE_BLAS_MODE
434 
449  template< typename MT3 // Type of the left-hand side target matrix
450  , typename MT4 // Type of the left-hand side matrix operand
451  , typename MT5 > // Type of the right-hand side matrix operand
452  static inline void sgemm( MT3& C, const MT4& A, const MT5& B, float alpha, float beta )
453  {
454  using boost::numeric_cast;
455 
459 
460  const int M ( numeric_cast<int>( A.rows() ) );
461  const int N ( numeric_cast<int>( B.columns() ) );
462  const int K ( numeric_cast<int>( A.columns() ) );
463  const int lda( numeric_cast<int>( A.spacing() ) );
464  const int ldb( numeric_cast<int>( B.spacing() ) );
465  const int ldc( numeric_cast<int>( C.spacing() ) );
466 
468  cblas_ssymm( CblasColMajor, CblasLeft, CblasLower,
469  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
470  }
472  cblas_ssymm( CblasRowMajor, CblasRight, CblasUpper,
473  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
474  }
475  else {
476  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
477  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
478  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
479  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
480  }
481  }
483 #endif
484  //**********************************************************************************************
485 
486  //**BLAS kernel (double precision)**************************************************************
487 #if BLAZE_BLAS_MODE
488 
503  template< typename MT3 // Type of the left-hand side target matrix
504  , typename MT4 // Type of the left-hand side matrix operand
505  , typename MT5 > // Type of the right-hand side matrix operand
506  static inline void dgemm( MT3& C, const MT4& A, const MT5& B, double alpha, double beta )
507  {
508  using boost::numeric_cast;
509 
513 
514  const int M ( numeric_cast<int>( A.rows() ) );
515  const int N ( numeric_cast<int>( B.columns() ) );
516  const int K ( numeric_cast<int>( A.columns() ) );
517  const int lda( numeric_cast<int>( A.spacing() ) );
518  const int ldb( numeric_cast<int>( B.spacing() ) );
519  const int ldc( numeric_cast<int>( C.spacing() ) );
520 
522  cblas_dsymm( CblasColMajor, CblasLeft, CblasLower,
523  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
524  }
525  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
526  cblas_dsymm( CblasRowMajor, CblasRight, CblasUpper,
527  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
528  }
529  else {
530  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
531  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
532  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
533  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
534  }
535  }
537 #endif
538  //**********************************************************************************************
539 
540  //**BLAS kernel (single precision complex)******************************************************
541 #if BLAZE_BLAS_MODE
542 
557  template< typename MT3 // Type of the left-hand side target matrix
558  , typename MT4 // Type of the left-hand side matrix operand
559  , typename MT5 > // Type of the right-hand side matrix operand
560  static inline void cgemm( MT3& C, const MT4& A, const MT5& B,
561  complex<float> alpha, complex<float> beta )
562  {
563  using boost::numeric_cast;
564 
568  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
569  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
570  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
571 
572  const int M ( numeric_cast<int>( A.rows() ) );
573  const int N ( numeric_cast<int>( B.columns() ) );
574  const int K ( numeric_cast<int>( A.columns() ) );
575  const int lda( numeric_cast<int>( A.spacing() ) );
576  const int ldb( numeric_cast<int>( B.spacing() ) );
577  const int ldc( numeric_cast<int>( C.spacing() ) );
578 
579  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
580  cblas_csymm( CblasColMajor, CblasLeft, CblasLower,
581  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
582  }
583  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
584  cblas_csymm( CblasRowMajor, CblasRight, CblasUpper,
585  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
586  }
587  else {
588  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
589  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
590  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
591  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
592  }
593  }
595 #endif
596  //**********************************************************************************************
597 
598  //**BLAS kernel (double precision complex)******************************************************
599 #if BLAZE_BLAS_MODE
600 
615  template< typename MT3 // Type of the left-hand side target matrix
616  , typename MT4 // Type of the left-hand side matrix operand
617  , typename MT5 > // Type of the right-hand side matrix operand
618  static inline void zgemm( MT3& C, const MT4& A, const MT5& B,
619  complex<double> alpha, complex<double> beta )
620  {
621  using boost::numeric_cast;
622 
626  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
627  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
628  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
629 
630  const int M ( numeric_cast<int>( A.rows() ) );
631  const int N ( numeric_cast<int>( B.columns() ) );
632  const int K ( numeric_cast<int>( A.columns() ) );
633  const int lda( numeric_cast<int>( A.spacing() ) );
634  const int ldb( numeric_cast<int>( B.spacing() ) );
635  const int ldc( numeric_cast<int>( C.spacing() ) );
636 
637  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
638  cblas_zsymm( CblasColMajor, CblasLeft, CblasLower,
639  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
640  }
641  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
642  cblas_zsymm( CblasRowMajor, CblasRight, CblasUpper,
643  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
644  }
645  else {
646  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
647  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
648  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
649  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
650  }
651  }
653 #endif
654  //**********************************************************************************************
655 
656  //**Assignment to dense matrices****************************************************************
669  template< typename MT // Type of the target dense matrix
670  , bool SO > // Storage order of the target dense matrix
671  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
672  {
674 
675  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
676  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
677 
678  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
679  return;
680  }
681  else if( rhs.lhs_.columns() == 0UL ) {
682  reset( ~lhs );
683  return;
684  }
685 
686  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
687  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
688 
689  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
690  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
691  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
692  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
693  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
694  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
695 
696  DMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
697  }
699  //**********************************************************************************************
700 
701  //**Assignment to dense matrices (kernel selection)*********************************************
712  template< typename MT3 // Type of the left-hand side target matrix
713  , typename MT4 // Type of the left-hand side matrix operand
714  , typename MT5 > // Type of the right-hand side matrix operand
715  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
716  {
717  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
718  DMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
719  else
720  DMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
721  }
723  //**********************************************************************************************
724 
725  //**Default assignment to dense matrices********************************************************
739  template< typename MT3 // Type of the left-hand side target matrix
740  , typename MT4 // Type of the left-hand side matrix operand
741  , typename MT5 > // Type of the right-hand side matrix operand
742  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
743  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
744  {
745  const size_t M( A.rows() );
746  const size_t N( B.columns() );
747  const size_t K( A.columns() );
748 
749  for( size_t i=0UL; i<M; ++i ) {
750  for( size_t j=0UL; j<N; ++j ) {
751  C(i,j) = A(i,0UL) * B(0UL,j);
752  }
753  for( size_t k=1UL; k<K; ++k ) {
754  for( size_t j=0UL; j<N; ++j ) {
755  C(i,j) += A(i,k) * B(k,j);
756  }
757  }
758  }
759  }
761  //**********************************************************************************************
762 
763  //**Vectorized default assignment to row-major dense matrices***********************************
777  template< typename MT3 // Type of the left-hand side target matrix
778  , typename MT4 // Type of the left-hand side matrix operand
779  , typename MT5 > // Type of the right-hand side matrix operand
780  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
781  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
782  {
783  typedef IntrinsicTrait<ElementType> IT;
784 
785  const size_t M( A.rows() );
786  const size_t N( B.columns() );
787  const size_t K( A.columns() );
788 
789  size_t i( 0UL );
790 
791  for( ; (i+2UL) <= M; i+=2UL ) {
792  size_t j( 0UL );
793  for( ; (j+4UL) <= N; j+=4UL ) {
794  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
795  for( size_t k=0UL; k<K; k+=IT::size ) {
796  const IntrinsicType a1( A.load(i ,k) );
797  const IntrinsicType a2( A.load(i+1UL,k) );
798  const IntrinsicType b1( B.load(k,j ) );
799  const IntrinsicType b2( B.load(k,j+1UL) );
800  const IntrinsicType b3( B.load(k,j+2UL) );
801  const IntrinsicType b4( B.load(k,j+3UL) );
802  xmm1 = xmm1 + a1 * b1;
803  xmm2 = xmm2 + a1 * b2;
804  xmm3 = xmm3 + a1 * b3;
805  xmm4 = xmm4 + a1 * b4;
806  xmm5 = xmm5 + a2 * b1;
807  xmm6 = xmm6 + a2 * b2;
808  xmm7 = xmm7 + a2 * b3;
809  xmm8 = xmm8 + a2 * b4;
810  }
811  (~C)(i ,j ) = sum( xmm1 );
812  (~C)(i ,j+1UL) = sum( xmm2 );
813  (~C)(i ,j+2UL) = sum( xmm3 );
814  (~C)(i ,j+3UL) = sum( xmm4 );
815  (~C)(i+1UL,j ) = sum( xmm5 );
816  (~C)(i+1UL,j+1UL) = sum( xmm6 );
817  (~C)(i+1UL,j+2UL) = sum( xmm7 );
818  (~C)(i+1UL,j+3UL) = sum( xmm8 );
819  }
820  for( ; (j+2UL) <= N; j+=2UL ) {
821  IntrinsicType xmm1, xmm2, xmm3, xmm4;
822  for( size_t k=0UL; k<K; k+=IT::size ) {
823  const IntrinsicType a1( A.load(i ,k) );
824  const IntrinsicType a2( A.load(i+1UL,k) );
825  const IntrinsicType b1( B.load(k,j ) );
826  const IntrinsicType b2( B.load(k,j+1UL) );
827  xmm1 = xmm1 + a1 * b1;
828  xmm2 = xmm2 + a1 * b2;
829  xmm3 = xmm3 + a2 * b1;
830  xmm4 = xmm4 + a2 * b2;
831  }
832  (~C)(i ,j ) = sum( xmm1 );
833  (~C)(i ,j+1UL) = sum( xmm2 );
834  (~C)(i+1UL,j ) = sum( xmm3 );
835  (~C)(i+1UL,j+1UL) = sum( xmm4 );
836  }
837  if( j < N ) {
838  IntrinsicType xmm1, xmm2;
839  for( size_t k=0UL; k<K; k+=IT::size ) {
840  const IntrinsicType b1( B.load(k,j) );
841  xmm1 = xmm1 + A.load(i ,k) * b1;
842  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
843  }
844  (~C)(i ,j) = sum( xmm1 );
845  (~C)(i+1UL,j) = sum( xmm2 );
846  }
847  }
848  if( i < M ) {
849  size_t j( 0UL );
850  for( ; (j+4UL) <= N; j+=4UL ) {
851  IntrinsicType xmm1, xmm2, xmm3, xmm4;
852  for( size_t k=0UL; k<K; k+=IT::size ) {
853  const IntrinsicType a1( A.load(i,k) );
854  xmm1 = xmm1 + a1 * B.load(k,j );
855  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
856  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
857  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
858  }
859  (~C)(i,j ) = sum( xmm1 );
860  (~C)(i,j+1UL) = sum( xmm2 );
861  (~C)(i,j+2UL) = sum( xmm3 );
862  (~C)(i,j+3UL) = sum( xmm4 );
863  }
864  for( ; (j+2UL) <= N; j+=2UL ) {
865  IntrinsicType xmm1, xmm2;
866  for( size_t k=0UL; k<K; k+=IT::size ) {
867  const IntrinsicType a1( A.load(i,k) );
868  xmm1 = xmm1 + a1 * B.load(k,j );
869  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
870  }
871  (~C)(i,j ) = sum( xmm1 );
872  (~C)(i,j+1UL) = sum( xmm2 );
873  }
874  if( j < N ) {
875  IntrinsicType xmm1, xmm2;
876  for( size_t k=0UL; k<K; k+=IT::size ) {
877  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
878  }
879  (~C)(i,j) = sum( xmm1 );
880  }
881  }
882  }
884  //**********************************************************************************************
885 
886  //**Vectorized default assignment to column-major dense matrices********************************
900  template< typename MT3 // Type of the left-hand side target matrix
901  , typename MT4 // Type of the left-hand side matrix operand
902  , typename MT5 > // Type of the right-hand side matrix operand
903  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
904  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
905  {
906  typedef IntrinsicTrait<ElementType> IT;
907 
908  const size_t M( A.rows() );
909  const size_t N( B.columns() );
910  const size_t K( A.columns() );
911 
912  size_t i( 0UL );
913 
914  for( ; (i+4UL) <= M; i+=4UL ) {
915  size_t j( 0UL );
916  for( ; (j+2UL) <= N; j+=2UL ) {
917  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
918  for( size_t k=0UL; k<K; k+=IT::size ) {
919  const IntrinsicType a1( A.load(i ,k) );
920  const IntrinsicType a2( A.load(i+1UL,k) );
921  const IntrinsicType a3( A.load(i+2UL,k) );
922  const IntrinsicType a4( A.load(i+3UL,k) );
923  const IntrinsicType b1( B.load(k,j ) );
924  const IntrinsicType b2( B.load(k,j+1UL) );
925  xmm1 = xmm1 + a1 * b1;
926  xmm2 = xmm2 + a1 * b2;
927  xmm3 = xmm3 + a2 * b1;
928  xmm4 = xmm4 + a2 * b2;
929  xmm5 = xmm5 + a3 * b1;
930  xmm6 = xmm6 + a3 * b2;
931  xmm7 = xmm7 + a4 * b1;
932  xmm8 = xmm8 + a4 * b2;
933  }
934  (~C)(i ,j ) = sum( xmm1 );
935  (~C)(i ,j+1UL) = sum( xmm2 );
936  (~C)(i+1UL,j ) = sum( xmm3 );
937  (~C)(i+1UL,j+1UL) = sum( xmm4 );
938  (~C)(i+2UL,j ) = sum( xmm5 );
939  (~C)(i+2UL,j+1UL) = sum( xmm6 );
940  (~C)(i+3UL,j ) = sum( xmm7 );
941  (~C)(i+3UL,j+1UL) = sum( xmm8 );
942  }
943  if( j < N ) {
944  IntrinsicType xmm1, xmm2, xmm3, xmm4;
945  for( size_t k=0UL; k<K; k+=IT::size ) {
946  const IntrinsicType b1( B.load(k,j) );
947  xmm1 = xmm1 + A.load(i ,k) * b1;
948  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
949  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
950  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
951  }
952  (~C)(i ,j) = sum( xmm1 );
953  (~C)(i+1UL,j) = sum( xmm2 );
954  (~C)(i+2UL,j) = sum( xmm3 );
955  (~C)(i+3UL,j) = sum( xmm4 );
956  }
957  }
958  for( ; (i+2UL) <= M; i+=2UL ) {
959  size_t j( 0UL );
960  for( ; (j+2UL) <= N; j+=2UL ) {
961  IntrinsicType xmm1, xmm2, xmm3, xmm4;
962  for( size_t k=0UL; k<K; k+=IT::size ) {
963  const IntrinsicType a1( A.load(i ,k) );
964  const IntrinsicType a2( A.load(i+1UL,k) );
965  const IntrinsicType b1( B.load(k,j ) );
966  const IntrinsicType b2( B.load(k,j+1UL) );
967  xmm1 = xmm1 + a1 * b1;
968  xmm2 = xmm2 + a1 * b2;
969  xmm3 = xmm3 + a2 * b1;
970  xmm4 = xmm4 + a2 * b2;
971  }
972  (~C)(i ,j ) = sum( xmm1 );
973  (~C)(i ,j+1UL) = sum( xmm2 );
974  (~C)(i+1UL,j ) = sum( xmm3 );
975  (~C)(i+1UL,j+1UL) = sum( xmm4 );
976  }
977  if( j < N ) {
978  IntrinsicType xmm1, xmm2;
979  for( size_t k=0UL; k<K; k+=IT::size ) {
980  const IntrinsicType b1( B.load(k,j) );
981  xmm1 = xmm1 + A.load(i ,k) * b1;
982  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
983  }
984  (~C)(i ,j) = sum( xmm1 );
985  (~C)(i+1UL,j) = sum( xmm2 );
986  }
987  }
988  if( i < M ) {
989  size_t j( 0UL );
990  for( ; (j+2UL) <= N; j+=2UL ) {
991  IntrinsicType xmm1, xmm2;
992  for( size_t k=0UL; k<K; k+=IT::size ) {
993  const IntrinsicType a1( A.load(i,k) );
994  xmm1 = xmm1 + a1 * B.load(k,j );
995  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
996  }
997  (~C)(i,j ) = sum( xmm1 );
998  (~C)(i,j+1UL) = sum( xmm2 );
999  }
1000  if( j < N ) {
1001  IntrinsicType xmm1, xmm2;
1002  for( size_t k=0UL; k<K; k+=IT::size ) {
1003  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1004  }
1005  (~C)(i,j) = sum( xmm1 );
1006  }
1007  }
1008  }
1010  //**********************************************************************************************
1011 
1012  //**Default assignment to dense matrices********************************************************
1026  template< typename MT3 // Type of the left-hand side target matrix
1027  , typename MT4 // Type of the left-hand side matrix operand
1028  , typename MT5 > // Type of the right-hand side matrix operand
1029  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1030  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1031  {
1032  selectDefaultAssignKernel( C, A, B );
1033  }
1035  //**********************************************************************************************
1036 
1037  //**BLAS-based assignment to dense matrices (single precision)**********************************
1038 #if BLAZE_BLAS_MODE
1039 
1052  template< typename MT3 // Type of the left-hand side target matrix
1053  , typename MT4 // Type of the left-hand side matrix operand
1054  , typename MT5 > // Type of the right-hand side matrix operand
1055  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1056  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1057  {
1058  sgemm( C, A, B, 1.0F, 0.0F );
1059  }
1061 #endif
1062  //**********************************************************************************************
1063 
1064  //**BLAS-based assignment to dense matrices (double precision)**********************************
1065 #if BLAZE_BLAS_MODE
1066 
1079  template< typename MT3 // Type of the left-hand side target matrix
1080  , typename MT4 // Type of the left-hand side matrix operand
1081  , typename MT5 > // Type of the right-hand side matrix operand
1082  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1083  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1084  {
1085  dgemm( C, A, B, 1.0, 0.0 );
1086  }
1088 #endif
1089  //**********************************************************************************************
1090 
1091  //**BLAS-based assignment to dense matrices (single precision complex)**************************
1092 #if BLAZE_BLAS_MODE
1093 
1106  template< typename MT3 // Type of the left-hand side target matrix
1107  , typename MT4 // Type of the left-hand side matrix operand
1108  , typename MT5 > // Type of the right-hand side matrix operand
1109  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1110  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1111  {
1112  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1113  }
1115 #endif
1116  //**********************************************************************************************
1117 
1118  //**BLAS-based assignment to dense matrices (double precision complex)**************************
1119 #if BLAZE_BLAS_MODE
1120 
1133  template< typename MT3 // Type of the left-hand side target matrix
1134  , typename MT4 // Type of the left-hand side matrix operand
1135  , typename MT5 > // Type of the right-hand side matrix operand
1136  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1137  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1138  {
1139  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1140  }
1142 #endif
1143  //**********************************************************************************************
1144 
1145  //**Assignment to sparse matrices***************************************************************
1158  template< typename MT // Type of the target sparse matrix
1159  , bool SO > // Storage order of the target sparse matrix
1160  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1161  {
1163 
1164  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1165 
1172 
1173  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1174  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1175 
1176  const TmpType tmp( serial( rhs ) );
1177  assign( ~lhs, tmp );
1178  }
1180  //**********************************************************************************************
1181 
1182  //**Addition assignment to dense matrices*******************************************************
1195  template< typename MT // Type of the target dense matrix
1196  , bool SO > // Storage order of the target dense matrix
1197  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1198  {
1200 
1201  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1202  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1203 
1204  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1205  return;
1206  }
1207 
1208  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1209  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1210 
1211  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1212  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1213  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1214  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1215  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1216  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1217 
1218  DMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1219  }
1221  //**********************************************************************************************
1222 
1223  //**Addition assignment to dense matrices (kernel selection)************************************
1234  template< typename MT3 // Type of the left-hand side target matrix
1235  , typename MT4 // Type of the left-hand side matrix operand
1236  , typename MT5 > // Type of the right-hand side matrix operand
1237  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1238  {
1239  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
1240  DMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1241  else
1242  DMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1243  }
1245  //**********************************************************************************************
1246 
1247  //**Default addition assignment to dense matrices***********************************************
1261  template< typename MT3 // Type of the left-hand side target matrix
1262  , typename MT4 // Type of the left-hand side matrix operand
1263  , typename MT5 > // Type of the right-hand side matrix operand
1264  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1265  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1266  {
1267  const size_t M( A.rows() );
1268  const size_t N( B.columns() );
1269  const size_t K( A.columns() );
1270 
1271  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1272  const size_t end( N & size_t(-2) );
1273 
1274  for( size_t i=0UL; i<M; ++i ) {
1275  for( size_t k=0UL; k<K; ++k ) {
1276  for( size_t j=0UL; j<end; j+=2UL ) {
1277  C(i,j ) += A(i,k) * B(k,j );
1278  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1279  }
1280  if( end < N ) {
1281  C(i,end) += A(i,k) * B(k,end);
1282  }
1283  }
1284  }
1285  }
1287  //**********************************************************************************************
1288 
1289  //**Vectorized default addition assignment to row-major dense matrices**************************
1303  template< typename MT3 // Type of the left-hand side target matrix
1304  , typename MT4 // Type of the left-hand side matrix operand
1305  , typename MT5 > // Type of the right-hand side matrix operand
1306  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1307  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1308  {
1309  typedef IntrinsicTrait<ElementType> IT;
1310 
1311  const size_t M( A.rows() );
1312  const size_t N( B.columns() );
1313  const size_t K( A.columns() );
1314 
1315  size_t i( 0UL );
1316 
1317  for( ; (i+2UL) <= M; i+=2UL ) {
1318  size_t j( 0UL );
1319  for( ; (j+4UL) <= N; j+=4UL ) {
1320  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1321  for( size_t k=0UL; k<K; k+=IT::size ) {
1322  const IntrinsicType a1( A.load(i ,k) );
1323  const IntrinsicType a2( A.load(i+1UL,k) );
1324  const IntrinsicType b1( B.load(k,j ) );
1325  const IntrinsicType b2( B.load(k,j+1UL) );
1326  const IntrinsicType b3( B.load(k,j+2UL) );
1327  const IntrinsicType b4( B.load(k,j+3UL) );
1328  xmm1 = xmm1 + a1 * b1;
1329  xmm2 = xmm2 + a1 * b2;
1330  xmm3 = xmm3 + a1 * b3;
1331  xmm4 = xmm4 + a1 * b4;
1332  xmm5 = xmm5 + a2 * b1;
1333  xmm6 = xmm6 + a2 * b2;
1334  xmm7 = xmm7 + a2 * b3;
1335  xmm8 = xmm8 + a2 * b4;
1336  }
1337  (~C)(i ,j ) += sum( xmm1 );
1338  (~C)(i ,j+1UL) += sum( xmm2 );
1339  (~C)(i ,j+2UL) += sum( xmm3 );
1340  (~C)(i ,j+3UL) += sum( xmm4 );
1341  (~C)(i+1UL,j ) += sum( xmm5 );
1342  (~C)(i+1UL,j+1UL) += sum( xmm6 );
1343  (~C)(i+1UL,j+2UL) += sum( xmm7 );
1344  (~C)(i+1UL,j+3UL) += sum( xmm8 );
1345  }
1346  for( ; (j+2UL) <= N; j+=2UL ) {
1347  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1348  for( size_t k=0UL; k<K; k+=IT::size ) {
1349  const IntrinsicType a1( A.load(i ,k) );
1350  const IntrinsicType a2( A.load(i+1UL,k) );
1351  const IntrinsicType b1( B.load(k,j ) );
1352  const IntrinsicType b2( B.load(k,j+1UL) );
1353  xmm1 = xmm1 + a1 * b1;
1354  xmm2 = xmm2 + a1 * b2;
1355  xmm3 = xmm3 + a2 * b1;
1356  xmm4 = xmm4 + a2 * b2;
1357  }
1358  (~C)(i ,j ) += sum( xmm1 );
1359  (~C)(i ,j+1UL) += sum( xmm2 );
1360  (~C)(i+1UL,j ) += sum( xmm3 );
1361  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1362  }
1363  if( j < N ) {
1364  IntrinsicType xmm1, xmm2;
1365  for( size_t k=0UL; k<K; k+=IT::size ) {
1366  const IntrinsicType b1( B.load(k,j) );
1367  xmm1 = xmm1 + A.load(i ,k) * b1;
1368  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1369  }
1370  (~C)(i ,j) += sum( xmm1 );
1371  (~C)(i+1UL,j) += sum( xmm2 );
1372  }
1373  }
1374  if( i < M ) {
1375  size_t j( 0UL );
1376  for( ; (j+4UL) <= N; j+=4UL ) {
1377  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1378  for( size_t k=0UL; k<K; k+=IT::size ) {
1379  const IntrinsicType a1( A.load(i,k) );
1380  xmm1 = xmm1 + a1 * B.load(k,j );
1381  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1382  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1383  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1384  }
1385  (~C)(i,j ) += sum( xmm1 );
1386  (~C)(i,j+1UL) += sum( xmm2 );
1387  (~C)(i,j+2UL) += sum( xmm3 );
1388  (~C)(i,j+3UL) += sum( xmm4 );
1389  }
1390  for( ; (j+2UL) <= N; j+=2UL ) {
1391  IntrinsicType xmm1, xmm2;
1392  for( size_t k=0UL; k<K; k+=IT::size ) {
1393  const IntrinsicType a1( A.load(i,k) );
1394  xmm1 = xmm1 + a1 * B.load(k,j );
1395  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1396  }
1397  (~C)(i,j ) += sum( xmm1 );
1398  (~C)(i,j+1UL) += sum( xmm2 );
1399  }
1400  if( j < N ) {
1401  IntrinsicType xmm1, xmm2;
1402  for( size_t k=0UL; k<K; k+=IT::size ) {
1403  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1404  }
1405  (~C)(i,j) += sum( xmm1 );
1406  }
1407  }
1408  }
1410  //**********************************************************************************************
1411 
1412  //**Vectorized default addition assignment to column-major dense matrices***********************
1426  template< typename MT3 // Type of the left-hand side target matrix
1427  , typename MT4 // Type of the left-hand side matrix operand
1428  , typename MT5 > // Type of the right-hand side matrix operand
1429  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1430  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1431  {
1432  typedef IntrinsicTrait<ElementType> IT;
1433 
1434  const size_t M( A.rows() );
1435  const size_t N( B.columns() );
1436  const size_t K( A.columns() );
1437 
1438  size_t i( 0UL );
1439 
1440  for( ; (i+4UL) <= M; i+=4UL ) {
1441  size_t j( 0UL );
1442  for( ; (j+2UL) <= N; j+=2UL ) {
1443  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1444  for( size_t k=0UL; k<K; k+=IT::size ) {
1445  const IntrinsicType a1( A.load(i ,k) );
1446  const IntrinsicType a2( A.load(i+1UL,k) );
1447  const IntrinsicType a3( A.load(i+2UL,k) );
1448  const IntrinsicType a4( A.load(i+3UL,k) );
1449  const IntrinsicType b1( B.load(k,j ) );
1450  const IntrinsicType b2( B.load(k,j+1UL) );
1451  xmm1 = xmm1 + a1 * b1;
1452  xmm2 = xmm2 + a1 * b2;
1453  xmm3 = xmm3 + a2 * b1;
1454  xmm4 = xmm4 + a2 * b2;
1455  xmm5 = xmm5 + a3 * b1;
1456  xmm6 = xmm6 + a3 * b2;
1457  xmm7 = xmm7 + a4 * b1;
1458  xmm8 = xmm8 + a4 * b2;
1459  }
1460  (~C)(i ,j ) += sum( xmm1 );
1461  (~C)(i ,j+1UL) += sum( xmm2 );
1462  (~C)(i+1UL,j ) += sum( xmm3 );
1463  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1464  (~C)(i+2UL,j ) += sum( xmm5 );
1465  (~C)(i+2UL,j+1UL) += sum( xmm6 );
1466  (~C)(i+3UL,j ) += sum( xmm7 );
1467  (~C)(i+3UL,j+1UL) += sum( xmm8 );
1468  }
1469  if( j < N ) {
1470  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1471  for( size_t k=0UL; k<K; k+=IT::size ) {
1472  const IntrinsicType b1( B.load(k,j) );
1473  xmm1 = xmm1 + A.load(i ,k) * b1;
1474  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1475  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1476  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1477  }
1478  (~C)(i ,j) += sum( xmm1 );
1479  (~C)(i+1UL,j) += sum( xmm2 );
1480  (~C)(i+2UL,j) += sum( xmm3 );
1481  (~C)(i+3UL,j) += sum( xmm4 );
1482  }
1483  }
1484  for( ; (i+2UL) <= M; i+=2UL ) {
1485  size_t j( 0UL );
1486  for( ; (j+2UL) <= N; j+=2UL ) {
1487  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1488  for( size_t k=0UL; k<K; k+=IT::size ) {
1489  const IntrinsicType a1( A.load(i ,k) );
1490  const IntrinsicType a2( A.load(i+1UL,k) );
1491  const IntrinsicType b1( B.load(k,j ) );
1492  const IntrinsicType b2( B.load(k,j+1UL) );
1493  xmm1 = xmm1 + a1 * b1;
1494  xmm2 = xmm2 + a1 * b2;
1495  xmm3 = xmm3 + a2 * b1;
1496  xmm4 = xmm4 + a2 * b2;
1497  }
1498  (~C)(i ,j ) += sum( xmm1 );
1499  (~C)(i ,j+1UL) += sum( xmm2 );
1500  (~C)(i+1UL,j ) += sum( xmm3 );
1501  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1502  }
1503  if( j < N ) {
1504  IntrinsicType xmm1, xmm2;
1505  for( size_t k=0UL; k<K; k+=IT::size ) {
1506  const IntrinsicType b1( B.load(k,j) );
1507  xmm1 = xmm1 + A.load(i ,k) * b1;
1508  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1509  }
1510  (~C)(i ,j) += sum( xmm1 );
1511  (~C)(i+1UL,j) += sum( xmm2 );
1512  }
1513  }
1514  if( i < M ) {
1515  size_t j( 0UL );
1516  for( ; (j+2UL) <= N; j+=2UL ) {
1517  IntrinsicType xmm1, xmm2;
1518  for( size_t k=0UL; k<K; k+=IT::size ) {
1519  const IntrinsicType a1( A.load(i,k) );
1520  xmm1 = xmm1 + a1 * B.load(k,j );
1521  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1522  }
1523  (~C)(i,j ) += sum( xmm1 );
1524  (~C)(i,j+1UL) += sum( xmm2 );
1525  }
1526  if( j < N ) {
1527  IntrinsicType xmm1, xmm2;
1528  for( size_t k=0UL; k<K; k+=IT::size ) {
1529  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1530  }
1531  (~C)(i,j) += sum( xmm1 );
1532  }
1533  }
1534  }
1536  //**********************************************************************************************
1537 
1538  //**Default addition assignment to dense matrices***********************************************
1552  template< typename MT3 // Type of the left-hand side target matrix
1553  , typename MT4 // Type of the left-hand side matrix operand
1554  , typename MT5 > // Type of the right-hand side matrix operand
1555  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1556  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1557  {
1558  selectDefaultAddAssignKernel( C, A, B );
1559  }
1561  //**********************************************************************************************
1562 
1563  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1564 #if BLAZE_BLAS_MODE
1565 
1578  template< typename MT3 // Type of the left-hand side target matrix
1579  , typename MT4 // Type of the left-hand side matrix operand
1580  , typename MT5 > // Type of the right-hand side matrix operand
1581  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1582  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1583  {
1584  sgemm( C, A, B, 1.0F, 1.0F );
1585  }
1587 #endif
1588  //**********************************************************************************************
1589 
1590  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1591 #if BLAZE_BLAS_MODE
1592 
1605  template< typename MT3 // Type of the left-hand side target matrix
1606  , typename MT4 // Type of the left-hand side matrix operand
1607  , typename MT5 > // Type of the right-hand side matrix operand
1608  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1609  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1610  {
1611  dgemm( C, A, B, 1.0, 1.0 );
1612  }
1614 #endif
1615  //**********************************************************************************************
1616 
1617  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1618 #if BLAZE_BLAS_MODE
1619 
1632  template< typename MT3 // Type of the left-hand side target matrix
1633  , typename MT4 // Type of the left-hand side matrix operand
1634  , typename MT5 > // Type of the right-hand side matrix operand
1635  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1636  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1637  {
1638  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1639  }
1641 #endif
1642  //**********************************************************************************************
1643 
1644  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1645 #if BLAZE_BLAS_MODE
1646 
1659  template< typename MT3 // Type of the left-hand side target matrix
1660  , typename MT4 // Type of the left-hand side matrix operand
1661  , typename MT5 > // Type of the right-hand side matrix operand
1662  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1663  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1664  {
1665  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1666  }
1668 #endif
1669  //**********************************************************************************************
1670 
1671  //**Addition assignment to sparse matrices******************************************************
1672  // No special implementation for the addition assignment to sparse matrices.
1673  //**********************************************************************************************
1674 
1675  //**Subtraction assignment to dense matrices****************************************************
1688  template< typename MT // Type of the target dense matrix
1689  , bool SO > // Storage order of the target dense matrix
1690  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1691  {
1693 
1694  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1695  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1696 
1697  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1698  return;
1699  }
1700 
1701  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1702  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1703 
1704  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1705  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1706  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1707  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1708  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1709  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1710 
1711  DMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1712  }
1714  //**********************************************************************************************
1715 
1716  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1727  template< typename MT3 // Type of the left-hand side target matrix
1728  , typename MT4 // Type of the left-hand side matrix operand
1729  , typename MT5 > // Type of the right-hand side matrix operand
1730  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1731  {
1732  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
1733  DMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1734  else
1735  DMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1736  }
1738  //**********************************************************************************************
1739 
1740  //**Default subtraction assignment to dense matrices********************************************
1754  template< typename MT3 // Type of the left-hand side target matrix
1755  , typename MT4 // Type of the left-hand side matrix operand
1756  , typename MT5 > // Type of the right-hand side matrix operand
1757  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1758  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1759  {
1760  const size_t M( A.rows() );
1761  const size_t N( B.columns() );
1762  const size_t K( A.columns() );
1763 
1764  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1765  const size_t end( N & size_t(-2) );
1766 
1767  for( size_t i=0UL; i<M; ++i ) {
1768  for( size_t k=0UL; k<K; ++k ) {
1769  for( size_t j=0UL; j<end; j+=2UL ) {
1770  C(i,j ) -= A(i,k) * B(k,j );
1771  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1772  }
1773  if( end < N ) {
1774  C(i,end) -= A(i,k) * B(k,end);
1775  }
1776  }
1777  }
1778  }
1780  //**********************************************************************************************
1781 
1782  //**Default subtraction assignment to row-major dense matrices**********************************
1796  template< typename MT3 // Type of the left-hand side target matrix
1797  , typename MT4 // Type of the left-hand side matrix operand
1798  , typename MT5 > // Type of the right-hand side matrix operand
1799  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1800  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1801  {
1802  typedef IntrinsicTrait<ElementType> IT;
1803 
1804  const size_t M( A.rows() );
1805  const size_t N( B.columns() );
1806  const size_t K( A.columns() );
1807 
1808  size_t i( 0UL );
1809 
1810  for( ; (i+2UL) <= M; i+=2UL ) {
1811  size_t j( 0UL );
1812  for( ; (j+4UL) <= N; j+=4UL ) {
1813  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1814  for( size_t k=0UL; k<K; k+=IT::size ) {
1815  const IntrinsicType a1( A.load(i ,k) );
1816  const IntrinsicType a2( A.load(i+1UL,k) );
1817  const IntrinsicType b1( B.load(k,j ) );
1818  const IntrinsicType b2( B.load(k,j+1UL) );
1819  const IntrinsicType b3( B.load(k,j+2UL) );
1820  const IntrinsicType b4( B.load(k,j+3UL) );
1821  xmm1 = xmm1 + a1 * b1;
1822  xmm2 = xmm2 + a1 * b2;
1823  xmm3 = xmm3 + a1 * b3;
1824  xmm4 = xmm4 + a1 * b4;
1825  xmm5 = xmm5 + a2 * b1;
1826  xmm6 = xmm6 + a2 * b2;
1827  xmm7 = xmm7 + a2 * b3;
1828  xmm8 = xmm8 + a2 * b4;
1829  }
1830  (~C)(i ,j ) -= sum( xmm1 );
1831  (~C)(i ,j+1UL) -= sum( xmm2 );
1832  (~C)(i ,j+2UL) -= sum( xmm3 );
1833  (~C)(i ,j+3UL) -= sum( xmm4 );
1834  (~C)(i+1UL,j ) -= sum( xmm5 );
1835  (~C)(i+1UL,j+1UL) -= sum( xmm6 );
1836  (~C)(i+1UL,j+2UL) -= sum( xmm7 );
1837  (~C)(i+1UL,j+3UL) -= sum( xmm8 );
1838  }
1839  for( ; (j+2UL) <= N; j+=2UL ) {
1840  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1841  for( size_t k=0UL; k<K; k+=IT::size ) {
1842  const IntrinsicType a1( A.load(i ,k) );
1843  const IntrinsicType a2( A.load(i+1UL,k) );
1844  const IntrinsicType b1( B.load(k,j ) );
1845  const IntrinsicType b2( B.load(k,j+1UL) );
1846  xmm1 = xmm1 + a1 * b1;
1847  xmm2 = xmm2 + a1 * b2;
1848  xmm3 = xmm3 + a2 * b1;
1849  xmm4 = xmm4 + a2 * b2;
1850  }
1851  (~C)(i ,j ) -= sum( xmm1 );
1852  (~C)(i ,j+1UL) -= sum( xmm2 );
1853  (~C)(i+1UL,j ) -= sum( xmm3 );
1854  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1855  }
1856  if( j < N ) {
1857  IntrinsicType xmm1, xmm2;
1858  for( size_t k=0UL; k<K; k+=IT::size ) {
1859  const IntrinsicType b1( B.load(k,j) );
1860  xmm1 = xmm1 + A.load(i ,k) * b1;
1861  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1862  }
1863  (~C)(i ,j) -= sum( xmm1 );
1864  (~C)(i+1UL,j) -= sum( xmm2 );
1865  }
1866  }
1867  if( i < M ) {
1868  size_t j( 0UL );
1869  for( ; (j+4UL) <= N; j+=4UL ) {
1870  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1871  for( size_t k=0UL; k<K; k+=IT::size ) {
1872  const IntrinsicType a1( A.load(i,k) );
1873  xmm1 = xmm1 + a1 * B.load(k,j );
1874  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1875  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1876  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1877  }
1878  (~C)(i,j ) -= sum( xmm1 );
1879  (~C)(i,j+1UL) -= sum( xmm2 );
1880  (~C)(i,j+2UL) -= sum( xmm3 );
1881  (~C)(i,j+3UL) -= sum( xmm4 );
1882  }
1883  for( ; (j+2UL) <= N; j+=2UL ) {
1884  IntrinsicType xmm1, xmm2;
1885  for( size_t k=0UL; k<K; k+=IT::size ) {
1886  const IntrinsicType a1( A.load(i,k) );
1887  xmm1 = xmm1 + a1 * B.load(k,j );
1888  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1889  }
1890  (~C)(i,j ) -= sum( xmm1 );
1891  (~C)(i,j+1UL) -= sum( xmm2 );
1892  }
1893  if( j < N ) {
1894  IntrinsicType xmm1, xmm2;
1895  for( size_t k=0UL; k<K; k+=IT::size ) {
1896  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1897  }
1898  (~C)(i,j) -= sum( xmm1 );
1899  }
1900  }
1901  }
1903  //**********************************************************************************************
1904 
1905  //**Default subtraction assignment to column-major dense matrices*******************************
1919  template< typename MT3 // Type of the left-hand side target matrix
1920  , typename MT4 // Type of the left-hand side matrix operand
1921  , typename MT5 > // Type of the right-hand side matrix operand
1922  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1923  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1924  {
1925  typedef IntrinsicTrait<ElementType> IT;
1926 
1927  const size_t M( A.rows() );
1928  const size_t N( B.columns() );
1929  const size_t K( A.columns() );
1930 
1931  size_t i( 0UL );
1932 
1933  for( ; (i+4UL) <= M; i+=4UL ) {
1934  size_t j( 0UL );
1935  for( ; (j+2UL) <= N; j+=2UL ) {
1936  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1937  for( size_t k=0UL; k<K; k+=IT::size ) {
1938  const IntrinsicType a1( A.load(i ,k) );
1939  const IntrinsicType a2( A.load(i+1UL,k) );
1940  const IntrinsicType a3( A.load(i+2UL,k) );
1941  const IntrinsicType a4( A.load(i+3UL,k) );
1942  const IntrinsicType b1( B.load(k,j ) );
1943  const IntrinsicType b2( B.load(k,j+1UL) );
1944  xmm1 = xmm1 + a1 * b1;
1945  xmm2 = xmm2 + a1 * b2;
1946  xmm3 = xmm3 + a2 * b1;
1947  xmm4 = xmm4 + a2 * b2;
1948  xmm5 = xmm5 + a3 * b1;
1949  xmm6 = xmm6 + a3 * b2;
1950  xmm7 = xmm7 + a4 * b1;
1951  xmm8 = xmm8 + a4 * b2;
1952  }
1953  (~C)(i ,j ) -= sum( xmm1 );
1954  (~C)(i ,j+1UL) -= sum( xmm2 );
1955  (~C)(i+1UL,j ) -= sum( xmm3 );
1956  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1957  (~C)(i+2UL,j ) -= sum( xmm5 );
1958  (~C)(i+2UL,j+1UL) -= sum( xmm6 );
1959  (~C)(i+3UL,j ) -= sum( xmm7 );
1960  (~C)(i+3UL,j+1UL) -= sum( xmm8 );
1961  }
1962  if( j < N ) {
1963  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1964  for( size_t k=0UL; k<K; k+=IT::size ) {
1965  const IntrinsicType b1( B.load(k,j) );
1966  xmm1 = xmm1 + A.load(i ,k) * b1;
1967  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1968  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1969  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1970  }
1971  (~C)(i ,j) -= sum( xmm1 );
1972  (~C)(i+1UL,j) -= sum( xmm2 );
1973  (~C)(i+2UL,j) -= sum( xmm3 );
1974  (~C)(i+3UL,j) -= sum( xmm4 );
1975  }
1976  }
1977  for( ; (i+2UL) <= M; i+=2UL ) {
1978  size_t j( 0UL );
1979  for( ; (j+2UL) <= N; j+=2UL ) {
1980  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1981  for( size_t k=0UL; k<K; k+=IT::size ) {
1982  const IntrinsicType a1( A.load(i ,k) );
1983  const IntrinsicType a2( A.load(i+1UL,k) );
1984  const IntrinsicType b1( B.load(k,j ) );
1985  const IntrinsicType b2( B.load(k,j+1UL) );
1986  xmm1 = xmm1 + a1 * b1;
1987  xmm2 = xmm2 + a1 * b2;
1988  xmm3 = xmm3 + a2 * b1;
1989  xmm4 = xmm4 + a2 * b2;
1990  }
1991  (~C)(i ,j ) -= sum( xmm1 );
1992  (~C)(i ,j+1UL) -= sum( xmm2 );
1993  (~C)(i+1UL,j ) -= sum( xmm3 );
1994  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1995  }
1996  if( j < N ) {
1997  IntrinsicType xmm1, xmm2;
1998  for( size_t k=0UL; k<K; k+=IT::size ) {
1999  const IntrinsicType b1( B.load(k,j) );
2000  xmm1 = xmm1 + A.load(i ,k) * b1;
2001  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2002  }
2003  (~C)(i ,j) -= sum( xmm1 );
2004  (~C)(i+1UL,j) -= sum( xmm2 );
2005  }
2006  }
2007  if( i < M ) {
2008  size_t j( 0UL );
2009  for( ; (j+2UL) <= N; j+=2UL ) {
2010  IntrinsicType xmm1, xmm2;
2011  for( size_t k=0UL; k<K; k+=IT::size ) {
2012  const IntrinsicType a1( A.load(i,k) );
2013  xmm1 = xmm1 + a1 * B.load(k,j );
2014  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2015  }
2016  (~C)(i,j ) -= sum( xmm1 );
2017  (~C)(i,j+1UL) -= sum( xmm2 );
2018  }
2019  if( j < N ) {
2020  IntrinsicType xmm1, xmm2;
2021  for( size_t k=0UL; k<K; k+=IT::size ) {
2022  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2023  }
2024  (~C)(i,j) -= sum( xmm1 );
2025  }
2026  }
2027  }
2029  //**********************************************************************************************
2030 
2031  //**Default subtraction assignment to dense matrices********************************************
2045  template< typename MT3 // Type of the left-hand side target matrix
2046  , typename MT4 // Type of the left-hand side matrix operand
2047  , typename MT5 > // Type of the right-hand side matrix operand
2048  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2049  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2050  {
2051  selectDefaultSubAssignKernel( C, A, B );
2052  }
2054  //**********************************************************************************************
2055 
2056  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
2057 #if BLAZE_BLAS_MODE
2058 
2071  template< typename MT3 // Type of the left-hand side target matrix
2072  , typename MT4 // Type of the left-hand side matrix operand
2073  , typename MT5 > // Type of the right-hand side matrix operand
2074  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2075  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2076  {
2077  sgemm( C, A, B, -1.0F, 1.0F );
2078  }
2080 #endif
2081  //**********************************************************************************************
2082 
2083  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
2084 #if BLAZE_BLAS_MODE
2085 
2098  template< typename MT3 // Type of the left-hand side target matrix
2099  , typename MT4 // Type of the left-hand side matrix operand
2100  , typename MT5 > // Type of the right-hand side matrix operand
2101  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2102  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2103  {
2104  dgemm( C, A, B, -1.0, 1.0 );
2105  }
2107 #endif
2108  //**********************************************************************************************
2109 
2110  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
2111 #if BLAZE_BLAS_MODE
2112 
2125  template< typename MT3 // Type of the left-hand side target matrix
2126  , typename MT4 // Type of the left-hand side matrix operand
2127  , typename MT5 > // Type of the right-hand side matrix operand
2128  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2129  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2130  {
2131  cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2132  }
2134 #endif
2135  //**********************************************************************************************
2136 
2137  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
2138 #if BLAZE_BLAS_MODE
2139 
2152  template< typename MT3 // Type of the left-hand side target matrix
2153  , typename MT4 // Type of the left-hand side matrix operand
2154  , typename MT5 > // Type of the right-hand side matrix operand
2155  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2156  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2157  {
2158  zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2159  }
2161 #endif
2162  //**********************************************************************************************
2163 
2164  //**Subtraction assignment to sparse matrices***************************************************
2165  // No special implementation for the subtraction assignment to sparse matrices.
2166  //**********************************************************************************************
2167 
2168  //**Multiplication assignment to dense matrices*************************************************
2169  // No special implementation for the multiplication assignment to dense matrices.
2170  //**********************************************************************************************
2171 
2172  //**Multiplication assignment to sparse matrices************************************************
2173  // No special implementation for the multiplication assignment to sparse matrices.
2174  //**********************************************************************************************
2175 
2176  //**SMP assignment to dense matrices************************************************************
2191  template< typename MT // Type of the target dense matrix
2192  , bool SO > // Storage order of the target dense matrix
2193  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2194  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2195  {
2197 
2198  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2199  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2200 
2201  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2202  return;
2203  }
2204  else if( rhs.lhs_.columns() == 0UL ) {
2205  reset( ~lhs );
2206  return;
2207  }
2208 
2209  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2210  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2211 
2212  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2213  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2214  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2215  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2216  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2217  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2218 
2219  smpAssign( ~lhs, A * B );
2220  }
2222  //**********************************************************************************************
2223 
2224  //**SMP assignment to sparse matrices***********************************************************
2239  template< typename MT // Type of the target sparse matrix
2240  , bool SO > // Storage order of the target sparse matrix
2241  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2242  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2243  {
2245 
2246  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2247 
2254 
2255  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2256  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2257 
2258  const TmpType tmp( rhs );
2259  smpAssign( ~lhs, tmp );
2260  }
2262  //**********************************************************************************************
2263 
2264  //**SMP addition assignment to dense matrices***************************************************
2280  template< typename MT // Type of the target dense matrix
2281  , bool SO > // Storage order of the target dense matrix
2282  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2283  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2284  {
2286 
2287  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2288  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2289 
2290  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2291  return;
2292  }
2293 
2294  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2295  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2296 
2297  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2298  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2299  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2300  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2301  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2302  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2303 
2304  smpAddAssign( ~lhs, A * B );
2305  }
2307  //**********************************************************************************************
2308 
2309  //**SMP addition assignment to sparse matrices**************************************************
2310  // No special implementation for the SMP addition assignment to sparse matrices.
2311  //**********************************************************************************************
2312 
2313  //**SMP subtraction assignment to dense matrices************************************************
2329  template< typename MT // Type of the target dense matrix
2330  , bool SO > // Storage order of the target dense matrix
2331  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2332  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2333  {
2335 
2336  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2337  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2338 
2339  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2340  return;
2341  }
2342 
2343  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2344  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2345 
2346  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2347  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2348  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2349  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2350  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2351  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2352 
2353  smpSubAssign( ~lhs, A * B );
2354  }
2356  //**********************************************************************************************
2357 
2358  //**SMP subtraction assignment to sparse matrices***********************************************
2359  // No special implementation for the SMP subtraction assignment to sparse matrices.
2360  //**********************************************************************************************
2361 
2362  //**SMP multiplication assignment to dense matrices*********************************************
2363  // No special implementation for the SMP multiplication assignment to dense matrices.
2364  //**********************************************************************************************
2365 
2366  //**SMP multiplication assignment to sparse matrices********************************************
2367  // No special implementation for the SMP multiplication assignment to sparse matrices.
2368  //**********************************************************************************************
2369 
2370  //**Compile time checks*************************************************************************
2378  //**********************************************************************************************
2379 };
2380 //*************************************************************************************************
2381 
2382 
2383 
2384 
2385 //=================================================================================================
2386 //
2387 // DMATSCALARMULTEXPR SPECIALIZATION
2388 //
2389 //=================================================================================================
2390 
2391 //*************************************************************************************************
2399 template< typename MT1 // Type of the left-hand side dense matrix
2400  , typename MT2 // Type of the right-hand side dense matrix
2401  , typename ST > // Type of the right-hand side scalar value
2402 class DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >
2403  : public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
2404  , private MatScalarMultExpr
2405  , private Computation
2406 {
2407  private:
2408  //**Type definitions****************************************************************************
2409  typedef DMatTDMatMultExpr<MT1,MT2> MMM;
2410  typedef typename MMM::ResultType RES;
2411  typedef typename MT1::ResultType RT1;
2412  typedef typename MT2::ResultType RT2;
2413  typedef typename RT1::ElementType ET1;
2414  typedef typename RT2::ElementType ET2;
2415  typedef typename MT1::CompositeType CT1;
2416  typedef typename MT2::CompositeType CT2;
2417  //**********************************************************************************************
2418 
2419  //**********************************************************************************************
2421  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2422  //**********************************************************************************************
2423 
2424  //**********************************************************************************************
2426  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2427  //**********************************************************************************************
2428 
2429  //**********************************************************************************************
2431 
2434  template< typename T1, typename T2, typename T3 >
2435  struct IsEvaluationRequired {
2436  enum { value = ( evaluateLeft || evaluateRight ) };
2437  };
2438  //**********************************************************************************************
2439 
2440  //**********************************************************************************************
2442 
2445  template< typename T1, typename T2, typename T3, typename T4 >
2446  struct UseSinglePrecisionKernel {
2447  enum { value = BLAZE_BLAS_MODE &&
2448  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2449  IsFloat<typename T1::ElementType>::value &&
2450  IsFloat<typename T2::ElementType>::value &&
2451  IsFloat<typename T3::ElementType>::value &&
2452  !IsComplex<T4>::value };
2453  };
2454  //**********************************************************************************************
2455 
2456  //**********************************************************************************************
2458 
2461  template< typename T1, typename T2, typename T3, typename T4 >
2462  struct UseDoublePrecisionKernel {
2463  enum { value = BLAZE_BLAS_MODE &&
2464  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2465  IsDouble<typename T1::ElementType>::value &&
2466  IsDouble<typename T2::ElementType>::value &&
2467  IsDouble<typename T3::ElementType>::value &&
2468  !IsComplex<T4>::value };
2469  };
2470  //**********************************************************************************************
2471 
2472  //**********************************************************************************************
2474 
2477  template< typename T1, typename T2, typename T3 >
2478  struct UseSinglePrecisionComplexKernel {
2479  typedef complex<float> Type;
2480  enum { value = BLAZE_BLAS_MODE &&
2481  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2482  IsSame<typename T1::ElementType,Type>::value &&
2483  IsSame<typename T2::ElementType,Type>::value &&
2484  IsSame<typename T3::ElementType,Type>::value };
2485  };
2486  //**********************************************************************************************
2487 
2488  //**********************************************************************************************
2490 
2493  template< typename T1, typename T2, typename T3 >
2494  struct UseDoublePrecisionComplexKernel {
2495  typedef complex<double> Type;
2496  enum { value = BLAZE_BLAS_MODE &&
2497  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2498  IsSame<typename T1::ElementType,Type>::value &&
2499  IsSame<typename T2::ElementType,Type>::value &&
2500  IsSame<typename T3::ElementType,Type>::value };
2501  };
2502  //**********************************************************************************************
2503 
2504  //**********************************************************************************************
2506 
2508  template< typename T1, typename T2, typename T3, typename T4 >
2509  struct UseDefaultKernel {
2510  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2511  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2512  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2513  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2514  };
2515  //**********************************************************************************************
2516 
2517  //**********************************************************************************************
2519 
2521  template< typename T1, typename T2, typename T3, typename T4 >
2522  struct UseVectorizedDefaultKernel {
2523  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2524  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2525  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2526  IsSame<typename T1::ElementType,T4>::value &&
2527  IntrinsicTrait<typename T1::ElementType>::addition &&
2528  IntrinsicTrait<typename T1::ElementType>::multiplication };
2529  };
2530  //**********************************************************************************************
2531 
2532  public:
2533  //**Type definitions****************************************************************************
2534  typedef DMatScalarMultExpr<MMM,ST,false> This;
2535  typedef typename MultTrait<RES,ST>::Type ResultType;
2536  typedef typename ResultType::OppositeType OppositeType;
2537  typedef typename ResultType::TransposeType TransposeType;
2538  typedef typename ResultType::ElementType ElementType;
2539  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2540  typedef const ElementType ReturnType;
2541  typedef const ResultType CompositeType;
2542 
2544  typedef const DMatTDMatMultExpr<MT1,MT2> LeftOperand;
2545 
2547  typedef ST RightOperand;
2548 
2550  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2551 
2553  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2554  //**********************************************************************************************
2555 
2556  //**Compilation flags***************************************************************************
2558  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2559  IsSame<ET1,ET2>::value &&
2560  IsSame<ET1,ST>::value &&
2561  IntrinsicTrait<ET1>::addition &&
2562  IntrinsicTrait<ET1>::multiplication };
2563 
2565  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2566  !evaluateRight && MT2::smpAssignable };
2567  //**********************************************************************************************
2568 
2569  //**Constructor*********************************************************************************
2575  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2576  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2577  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2578  {}
2579  //**********************************************************************************************
2580 
2581  //**Access operator*****************************************************************************
2588  inline ReturnType operator()( size_t i, size_t j ) const {
2589  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2590  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2591  return matrix_(i,j) * scalar_;
2592  }
2593  //**********************************************************************************************
2594 
2595  //**Rows function*******************************************************************************
2600  inline size_t rows() const {
2601  return matrix_.rows();
2602  }
2603  //**********************************************************************************************
2604 
2605  //**Columns function****************************************************************************
2610  inline size_t columns() const {
2611  return matrix_.columns();
2612  }
2613  //**********************************************************************************************
2614 
2615  //**Left operand access*************************************************************************
2620  inline LeftOperand leftOperand() const {
2621  return matrix_;
2622  }
2623  //**********************************************************************************************
2624 
2625  //**Right operand access************************************************************************
2630  inline RightOperand rightOperand() const {
2631  return scalar_;
2632  }
2633  //**********************************************************************************************
2634 
2635  //**********************************************************************************************
2641  template< typename T >
2642  inline bool canAlias( const T* alias ) const {
2643  return matrix_.canAlias( alias );
2644  }
2645  //**********************************************************************************************
2646 
2647  //**********************************************************************************************
2653  template< typename T >
2654  inline bool isAliased( const T* alias ) const {
2655  return matrix_.isAliased( alias );
2656  }
2657  //**********************************************************************************************
2658 
2659  //**********************************************************************************************
2664  inline bool isAligned() const {
2665  return matrix_.isAligned();
2666  }
2667  //**********************************************************************************************
2668 
2669  //**********************************************************************************************
2674  inline bool canSMPAssign() const {
2675  typename MMM::LeftOperand A( matrix_.leftOperand() );
2676  return ( !BLAZE_BLAS_IS_PARALLEL ||
2677  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
2678  ( A.rows() > SMP_DMATTDMATMULT_THRESHOLD );
2679  }
2680  //**********************************************************************************************
2681 
2682  private:
2683  //**Member variables****************************************************************************
2684  LeftOperand matrix_;
2685  RightOperand scalar_;
2686  //**********************************************************************************************
2687 
2688  //**BLAS kernel (single precision)**************************************************************
2689 #if BLAZE_BLAS_MODE
2690 
2704  template< typename MT3 // Type of the left-hand side target matrix
2705  , typename MT4 // Type of the left-hand side matrix operand
2706  , typename MT5 > // Type of the right-hand side matrix operand
2707  static inline void sgemm( MT3& C, const MT4& A, const MT5& B, float alpha, float beta )
2708  {
2709  using boost::numeric_cast;
2710 
2714 
2715  const int M ( numeric_cast<int>( A.rows() ) );
2716  const int N ( numeric_cast<int>( B.columns() ) );
2717  const int K ( numeric_cast<int>( A.columns() ) );
2718  const int lda( numeric_cast<int>( A.spacing() ) );
2719  const int ldb( numeric_cast<int>( B.spacing() ) );
2720  const int ldc( numeric_cast<int>( C.spacing() ) );
2721 
2722  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2723  cblas_ssymm( CblasColMajor, CblasLeft, CblasLower,
2724  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2725  }
2726  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2727  cblas_ssymm( CblasRowMajor, CblasRight, CblasUpper,
2728  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2729  }
2730  else {
2731  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2732  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2733  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2734  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2735  }
2736  }
2737 #endif
2738  //**********************************************************************************************
2739 
2740  //**BLAS kernel (double precision)**************************************************************
2741 #if BLAZE_BLAS_MODE
2742 
2756  template< typename MT3 // Type of the left-hand side target matrix
2757  , typename MT4 // Type of the left-hand side matrix operand
2758  , typename MT5 > // Type of the right-hand side matrix operand
2759  static inline void dgemm( MT3& C, const MT4& A, const MT5& B, double alpha, double beta )
2760  {
2761  using boost::numeric_cast;
2762 
2766 
2767  const int M ( numeric_cast<int>( A.rows() ) );
2768  const int N ( numeric_cast<int>( B.columns() ) );
2769  const int K ( numeric_cast<int>( A.columns() ) );
2770  const int lda( numeric_cast<int>( A.spacing() ) );
2771  const int ldb( numeric_cast<int>( B.spacing() ) );
2772  const int ldc( numeric_cast<int>( C.spacing() ) );
2773 
2774  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2775  cblas_dsymm( CblasColMajor, CblasLeft, CblasLower,
2776  M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2777  }
2778  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2779  cblas_dsymm( CblasRowMajor, CblasRight, CblasUpper,
2780  M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2781  }
2782  else {
2783  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2784  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2785  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2786  M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2787  }
2788  }
2789 #endif
2790  //**********************************************************************************************
2791 
2792  //**BLAS kernel (single precision complex)******************************************************
2793 #if BLAZE_BLAS_MODE
2794 
2808  template< typename MT3 // Type of the left-hand side target matrix
2809  , typename MT4 // Type of the left-hand side matrix operand
2810  , typename MT5 > // Type of the right-hand side matrix operand
2811  static inline void cgemm( MT3& C, const MT4& A, const MT5& B,
2812  complex<float> alpha, complex<float> beta )
2813  {
2814  using boost::numeric_cast;
2815 
2819  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2820  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2821  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2822 
2823  const int M ( numeric_cast<int>( A.rows() ) );
2824  const int N ( numeric_cast<int>( B.columns() ) );
2825  const int K ( numeric_cast<int>( A.columns() ) );
2826  const int lda( numeric_cast<int>( A.spacing() ) );
2827  const int ldb( numeric_cast<int>( B.spacing() ) );
2828  const int ldc( numeric_cast<int>( C.spacing() ) );
2829 
2830  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2831  cblas_csymm( CblasColMajor, CblasLeft, CblasLower,
2832  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2833  }
2834  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2835  cblas_csymm( CblasRowMajor, CblasRight, CblasUpper,
2836  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
2837  }
2838  else {
2839  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2840  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2841  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2842  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2843  }
2844  }
2845 #endif
2846  //**********************************************************************************************
2847 
2848  //**BLAS kernel (double precision complex)******************************************************
2849 #if BLAZE_BLAS_MODE
2850 
2864  template< typename MT3 // Type of the left-hand side target matrix
2865  , typename MT4 // Type of the left-hand side matrix operand
2866  , typename MT5 > // Type of the right-hand side matrix operand
2867  static inline void zgemm( MT3& C, const MT4& A, const MT5& B,
2868  complex<double> alpha, complex<double> beta )
2869  {
2870  using boost::numeric_cast;
2871 
2875  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2876  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2877  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2878 
2879  const int M ( numeric_cast<int>( A.rows() ) );
2880  const int N ( numeric_cast<int>( B.columns() ) );
2881  const int K ( numeric_cast<int>( A.columns() ) );
2882  const int lda( numeric_cast<int>( A.spacing() ) );
2883  const int ldb( numeric_cast<int>( B.spacing() ) );
2884  const int ldc( numeric_cast<int>( C.spacing() ) );
2885 
2886  if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2887  cblas_zsymm( CblasColMajor, CblasLeft, CblasLower,
2888  M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2889  }
2890  else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2891  cblas_zsymm( CblasRowMajor, CblasRight, CblasUpper,
2892  M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
2893  }
2894  else {
2895  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2896  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2897  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2898  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2899  }
2900  }
2901 #endif
2902  //**********************************************************************************************
2903 
2904  //**Assignment to dense matrices****************************************************************
2916  template< typename MT // Type of the target dense matrix
2917  , bool SO > // Storage order of the target dense matrix
2918  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2919  {
2921 
2922  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2923  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2924 
2925  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2926  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2927 
2928  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2929  return;
2930  }
2931  else if( left.columns() == 0UL ) {
2932  reset( ~lhs );
2933  return;
2934  }
2935 
2936  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2937  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
2938 
2939  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2940  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2941  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2942  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2943  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2944  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2945 
2946  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2947  }
2948  //**********************************************************************************************
2949 
2950  //**Assignment to dense matrices (kernel selection)*********************************************
2961  template< typename MT3 // Type of the left-hand side target matrix
2962  , typename MT4 // Type of the left-hand side matrix operand
2963  , typename MT5 // Type of the right-hand side matrix operand
2964  , typename ST2 > // Type of the scalar value
2965  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2966  {
2967  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
2968  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2969  else
2970  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2971  }
2972  //**********************************************************************************************
2973 
2974  //**Default assignment to dense matrices********************************************************
2988  template< typename MT3 // Type of the left-hand side target matrix
2989  , typename MT4 // Type of the left-hand side matrix operand
2990  , typename MT5 // Type of the right-hand side matrix operand
2991  , typename ST2 > // Type of the scalar value
2992  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2993  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2994  {
2995  for( size_t i=0UL; i<A.rows(); ++i ) {
2996  for( size_t k=0UL; k<B.columns(); ++k ) {
2997  C(i,k) = A(i,0UL) * B(0UL,k);
2998  }
2999  for( size_t j=1UL; j<A.columns(); ++j ) {
3000  for( size_t k=0UL; k<B.columns(); ++k ) {
3001  C(i,k) += A(i,j) * B(j,k);
3002  }
3003  }
3004  for( size_t k=0UL; k<B.columns(); ++k ) {
3005  C(i,k) *= scalar;
3006  }
3007  }
3008  }
3009  //**********************************************************************************************
3010 
3011  //**Vectorized default assignment to row-major dense matrices***********************************
3025  template< typename MT3 // Type of the left-hand side target matrix
3026  , typename MT4 // Type of the left-hand side matrix operand
3027  , typename MT5 // Type of the right-hand side matrix operand
3028  , typename ST2 > // Type of the scalar value
3029  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3030  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3031  {
3032  typedef IntrinsicTrait<ElementType> IT;
3033 
3034  const size_t M( A.rows() );
3035  const size_t N( B.columns() );
3036  const size_t K( A.columns() );
3037 
3038  size_t i( 0UL );
3039 
3040  for( ; (i+2UL) <= M; i+=2UL ) {
3041  size_t j( 0UL );
3042  for( ; (j+4UL) <= N; j+=4UL ) {
3043  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3044  for( size_t k=0UL; k<K; k+=IT::size ) {
3045  const IntrinsicType a1( A.load(i ,k) );
3046  const IntrinsicType a2( A.load(i+1UL,k) );
3047  const IntrinsicType b1( B.load(k,j ) );
3048  const IntrinsicType b2( B.load(k,j+1UL) );
3049  const IntrinsicType b3( B.load(k,j+2UL) );
3050  const IntrinsicType b4( B.load(k,j+3UL) );
3051  xmm1 = xmm1 + a1 * b1;
3052  xmm2 = xmm2 + a1 * b2;
3053  xmm3 = xmm3 + a1 * b3;
3054  xmm4 = xmm4 + a1 * b4;
3055  xmm5 = xmm5 + a2 * b1;
3056  xmm6 = xmm6 + a2 * b2;
3057  xmm7 = xmm7 + a2 * b3;
3058  xmm8 = xmm8 + a2 * b4;
3059  }
3060  (~C)(i ,j ) = sum( xmm1 ) * scalar;
3061  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
3062  (~C)(i ,j+2UL) = sum( xmm3 ) * scalar;
3063  (~C)(i ,j+3UL) = sum( xmm4 ) * scalar;
3064  (~C)(i+1UL,j ) = sum( xmm5 ) * scalar;
3065  (~C)(i+1UL,j+1UL) = sum( xmm6 ) * scalar;
3066  (~C)(i+1UL,j+2UL) = sum( xmm7 ) * scalar;
3067  (~C)(i+1UL,j+3UL) = sum( xmm8 ) * scalar;
3068  }
3069  for( ; (j+2UL) <= N; j+=2UL ) {
3070  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3071  for( size_t k=0UL; k<K; k+=IT::size ) {
3072  const IntrinsicType a1( A.load(i ,k) );
3073  const IntrinsicType a2( A.load(i+1UL,k) );
3074  const IntrinsicType b1( B.load(k,j ) );
3075  const IntrinsicType b2( B.load(k,j+1UL) );
3076  xmm1 = xmm1 + a1 * b1;
3077  xmm2 = xmm2 + a1 * b2;
3078  xmm3 = xmm3 + a2 * b1;
3079  xmm4 = xmm4 + a2 * b2;
3080  }
3081  (~C)(i ,j ) = sum( xmm1 ) * scalar;
3082  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
3083  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
3084  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
3085  }
3086  if( j < N ) {
3087  IntrinsicType xmm1, xmm2;
3088  for( size_t k=0UL; k<K; k+=IT::size ) {
3089  const IntrinsicType b1( B.load(k,j) );
3090  xmm1 = xmm1 + A.load(i ,k) * b1;
3091  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3092  }
3093  (~C)(i ,j) = sum( xmm1 ) * scalar;
3094  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
3095  }
3096  }
3097  if( i < M ) {
3098  size_t j( 0UL );
3099  for( ; (j+4UL) <= N; j+=4UL ) {
3100  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3101  for( size_t k=0UL; k<K; k+=IT::size ) {
3102  const IntrinsicType a1( A.load(i,k) );
3103  xmm1 = xmm1 + a1 * B.load(k,j );
3104  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3105  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3106  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3107  }
3108  (~C)(i,j ) = sum( xmm1 ) * scalar;
3109  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
3110  (~C)(i,j+2UL) = sum( xmm3 ) * scalar;
3111  (~C)(i,j+3UL) = sum( xmm4 ) * scalar;
3112  }
3113  for( ; (j+2UL) <= N; j+=2UL ) {
3114  IntrinsicType xmm1, xmm2;
3115  for( size_t k=0UL; k<K; k+=IT::size ) {
3116  const IntrinsicType a1( A.load(i,k) );
3117  xmm1 = xmm1 + a1 * B.load(k,j );
3118  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3119  }
3120  (~C)(i,j ) = sum( xmm1 ) * scalar;
3121  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
3122  }
3123  if( j < N ) {
3124  IntrinsicType xmm1, xmm2;
3125  for( size_t k=0UL; k<K; k+=IT::size ) {
3126  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3127  }
3128  (~C)(i,j) = sum( xmm1 ) * scalar;
3129  }
3130  }
3131  }
3132  //**********************************************************************************************
3133 
3134  //**Vectorized default assignment to column-major dense matrices********************************
3148  template< typename MT3 // Type of the left-hand side target matrix
3149  , typename MT4 // Type of the left-hand side matrix operand
3150  , typename MT5 // Type of the right-hand side matrix operand
3151  , typename ST2 > // Type of the scalar value
3152  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3153  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3154  {
3155  typedef IntrinsicTrait<ElementType> IT;
3156 
3157  const size_t M( A.rows() );
3158  const size_t N( B.columns() );
3159  const size_t K( A.columns() );
3160 
3161  size_t i( 0UL );
3162 
3163  for( ; (i+4UL) <= M; i+=4UL ) {
3164  size_t j( 0UL );
3165  for( ; (j+2UL) <= N; j+=2UL ) {
3166  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3167  for( size_t k=0UL; k<K; k+=IT::size ) {
3168  const IntrinsicType a1( A.load(i ,k) );
3169  const IntrinsicType a2( A.load(i+1UL,k) );
3170  const IntrinsicType a3( A.load(i+2UL,k) );
3171  const IntrinsicType a4( A.load(i+3UL,k) );
3172  const IntrinsicType b1( B.load(k,j ) );
3173  const IntrinsicType b2( B.load(k,j+1UL) );
3174  xmm1 = xmm1 + a1 * b1;
3175  xmm2 = xmm2 + a1 * b2;
3176  xmm3 = xmm3 + a2 * b1;
3177  xmm4 = xmm4 + a2 * b2;
3178  xmm5 = xmm5 + a3 * b1;
3179  xmm6 = xmm6 + a3 * b2;
3180  xmm7 = xmm7 + a4 * b1;
3181  xmm8 = xmm8 + a4 * b2;
3182  }
3183  (~C)(i ,j ) = sum( xmm1 ) * scalar;
3184  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
3185  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
3186  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
3187  (~C)(i+2UL,j ) = sum( xmm5 ) * scalar;
3188  (~C)(i+2UL,j+1UL) = sum( xmm6 ) * scalar;
3189  (~C)(i+3UL,j ) = sum( xmm7 ) * scalar;
3190  (~C)(i+3UL,j+1UL) = sum( xmm8 ) * scalar;
3191  }
3192  if( j < N ) {
3193  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3194  for( size_t k=0UL; k<K; k+=IT::size ) {
3195  const IntrinsicType b1( B.load(k,j) );
3196  xmm1 = xmm1 + A.load(i ,k) * b1;
3197  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3198  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3199  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3200  }
3201  (~C)(i ,j) = sum( xmm1 ) * scalar;
3202  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
3203  (~C)(i+2UL,j) = sum( xmm3 ) * scalar;
3204  (~C)(i+3UL,j) = sum( xmm4 ) * scalar;
3205  }
3206  }
3207  for( ; (i+2UL) <= M; i+=2UL ) {
3208  size_t j( 0UL );
3209  for( ; (j+2UL) <= N; j+=2UL ) {
3210  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3211  for( size_t k=0UL; k<K; k+=IT::size ) {
3212  const IntrinsicType a1( A.load(i ,k) );
3213  const IntrinsicType a2( A.load(i+1UL,k) );
3214  const IntrinsicType b1( B.load(k,j ) );
3215  const IntrinsicType b2( B.load(k,j+1UL) );
3216  xmm1 = xmm1 + a1 * b1;
3217  xmm2 = xmm2 + a1 * b2;
3218  xmm3 = xmm3 + a2 * b1;
3219  xmm4 = xmm4 + a2 * b2;
3220  }
3221  (~C)(i ,j ) = sum( xmm1 ) * scalar;
3222  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
3223  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
3224  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
3225  }
3226  if( j < N ) {
3227  IntrinsicType xmm1, xmm2;
3228  for( size_t k=0UL; k<K; k+=IT::size ) {
3229  const IntrinsicType b1( B.load(k,j) );
3230  xmm1 = xmm1 + A.load(i ,k) * b1;
3231  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3232  }
3233  (~C)(i ,j) = sum( xmm1 ) * scalar;
3234  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
3235  }
3236  }
3237  if( i < M ) {
3238  size_t j( 0UL );
3239  for( ; (j+2UL) <= N; j+=2UL ) {
3240  IntrinsicType xmm1, xmm2;
3241  for( size_t k=0UL; k<K; k+=IT::size ) {
3242  const IntrinsicType a1( A.load(i,k) );
3243  xmm1 = xmm1 + a1 * B.load(k,j );
3244  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3245  }
3246  (~C)(i,j ) = sum( xmm1 ) * scalar;
3247  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
3248  }
3249  if( j < N ) {
3250  IntrinsicType xmm1, xmm2;
3251  for( size_t k=0UL; k<K; k+=IT::size ) {
3252  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3253  }
3254  (~C)(i,j) = sum( xmm1 ) * scalar;
3255  }
3256  }
3257  }
3258  //**********************************************************************************************
3259 
3260  //**BLAS-based assignment to dense matrices (default)*******************************************
3274  template< typename MT3 // Type of the left-hand side target matrix
3275  , typename MT4 // Type of the left-hand side matrix operand
3276  , typename MT5 // Type of the right-hand side matrix operand
3277  , typename ST2 > // Type of the scalar value
3278  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3279  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3280  {
3281  selectDefaultAssignKernel( C, A, B, scalar );
3282  }
3283  //**********************************************************************************************
3284 
3285  //**BLAS-based assignment to dense matrices (single precision)**********************************
3286 #if BLAZE_BLAS_MODE
3287 
3300  template< typename MT3 // Type of the left-hand side target matrix
3301  , typename MT4 // Type of the left-hand side matrix operand
3302  , typename MT5 // Type of the right-hand side matrix operand
3303  , typename ST2 > // Type of the scalar value
3304  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3305  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3306  {
3307  sgemm( C, A, B, scalar, 0.0F );
3308  }
3309 #endif
3310  //**********************************************************************************************
3311 
3312  //**BLAS-based assignment to dense matrices (double precision)**********************************
3313 #if BLAZE_BLAS_MODE
3314 
3327  template< typename MT3 // Type of the left-hand side target matrix
3328  , typename MT4 // Type of the left-hand side matrix operand
3329  , typename MT5 // Type of the right-hand side matrix operand
3330  , typename ST2 > // Type of the scalar value
3331  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3332  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3333  {
3334  dgemm( C, A, B, scalar, 0.0 );
3335  }
3336 #endif
3337  //**********************************************************************************************
3338 
3339  //**BLAS-based assignment to dense matrices (single precision complex)**************************
3340 #if BLAZE_BLAS_MODE
3341 
3354  template< typename MT3 // Type of the left-hand side target matrix
3355  , typename MT4 // Type of the left-hand side matrix operand
3356  , typename MT5 // Type of the right-hand side matrix operand
3357  , typename ST2 > // Type of the scalar value
3358  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3359  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3360  {
3361  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3362  }
3363 #endif
3364  //**********************************************************************************************
3365 
3366  //**BLAS-based assignment to dense matrices (double precision complex)**************************
3367 #if BLAZE_BLAS_MODE
3368 
3381  template< typename MT3 // Type of the left-hand side target matrix
3382  , typename MT4 // Type of the left-hand side matrix operand
3383  , typename MT5 // Type of the right-hand side matrix operand
3384  , typename ST2 > // Type of the scalar value
3385  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3386  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3387  {
3388  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3389  }
3390 #endif
3391  //**********************************************************************************************
3392 
3393  //**Assignment to sparse matrices***************************************************************
3405  template< typename MT // Type of the target sparse matrix
3406  , bool SO > // Storage order of the target sparse matrix
3407  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3408  {
3410 
3411  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
3412 
3419 
3420  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3421  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3422 
3423  const TmpType tmp( serial( rhs ) );
3424  assign( ~lhs, tmp );
3425  }
3426  //**********************************************************************************************
3427 
3428  //**Addition assignment to dense matrices*******************************************************
3440  template< typename MT // Type of the target dense matrix
3441  , bool SO > // Storage order of the target dense matrix
3442  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3443  {
3445 
3446  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3447  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3448 
3449  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3450  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3451 
3452  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3453  return;
3454  }
3455 
3456  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3457  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3458 
3459  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3460  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3461  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3462  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3463  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3464  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3465 
3466  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3467  }
3468  //**********************************************************************************************
3469 
3470  //**Addition assignment to dense matrices (kernel selection)************************************
3481  template< typename MT3 // Type of the left-hand side target matrix
3482  , typename MT4 // Type of the left-hand side matrix operand
3483  , typename MT5 // Type of the right-hand side matrix operand
3484  , typename ST2 > // Type of the scalar value
3485  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3486  {
3487  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
3488  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3489  else
3490  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3491  }
3492  //**********************************************************************************************
3493 
3494  //**Default addition assignment to dense matrices***********************************************
3508  template< typename MT3 // Type of the left-hand side target matrix
3509  , typename MT4 // Type of the left-hand side matrix operand
3510  , typename MT5 // Type of the right-hand side matrix operand
3511  , typename ST2 > // Type of the scalar value
3512  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3513  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3514  {
3515  const ResultType tmp( serial( A * B * scalar ) );
3516  addAssign( C, tmp );
3517  }
3518  //**********************************************************************************************
3519 
3520  //**Vectorized default addition assignment to row-major dense matrices**************************
3534  template< typename MT3 // Type of the left-hand side target matrix
3535  , typename MT4 // Type of the left-hand side matrix operand
3536  , typename MT5 // Type of the right-hand side matrix operand
3537  , typename ST2 > // Type of the scalar value
3538  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3539  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3540  {
3541  typedef IntrinsicTrait<ElementType> IT;
3542 
3543  const size_t M( A.rows() );
3544  const size_t N( B.columns() );
3545  const size_t K( A.columns() );
3546 
3547  size_t i( 0UL );
3548 
3549  for( ; (i+2UL) <= M; i+=2UL ) {
3550  size_t j( 0UL );
3551  for( ; (j+4UL) <= N; j+=4UL ) {
3552  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3553  for( size_t k=0UL; k<K; k+=IT::size ) {
3554  const IntrinsicType a1( A.load(i ,k) );
3555  const IntrinsicType a2( A.load(i+1UL,k) );
3556  const IntrinsicType b1( B.load(k,j ) );
3557  const IntrinsicType b2( B.load(k,j+1UL) );
3558  const IntrinsicType b3( B.load(k,j+2UL) );
3559  const IntrinsicType b4( B.load(k,j+3UL) );
3560  xmm1 = xmm1 + a1 * b1;
3561  xmm2 = xmm2 + a1 * b2;
3562  xmm3 = xmm3 + a1 * b3;
3563  xmm4 = xmm4 + a1 * b4;
3564  xmm5 = xmm5 + a2 * b1;
3565  xmm6 = xmm6 + a2 * b2;
3566  xmm7 = xmm7 + a2 * b3;
3567  xmm8 = xmm8 + a2 * b4;
3568  }
3569  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3570  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3571  (~C)(i ,j+2UL) += sum( xmm3 ) * scalar;
3572  (~C)(i ,j+3UL) += sum( xmm4 ) * scalar;
3573  (~C)(i+1UL,j ) += sum( xmm5 ) * scalar;
3574  (~C)(i+1UL,j+1UL) += sum( xmm6 ) * scalar;
3575  (~C)(i+1UL,j+2UL) += sum( xmm7 ) * scalar;
3576  (~C)(i+1UL,j+3UL) += sum( xmm8 ) * scalar;
3577  }
3578  for( ; (j+2UL) <= N; j+=2UL ) {
3579  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3580  for( size_t k=0UL; k<K; k+=IT::size ) {
3581  const IntrinsicType a1( A.load(i ,k) );
3582  const IntrinsicType a2( A.load(i+1UL,k) );
3583  const IntrinsicType b1( B.load(k,j ) );
3584  const IntrinsicType b2( B.load(k,j+1UL) );
3585  xmm1 = xmm1 + a1 * b1;
3586  xmm2 = xmm2 + a1 * b2;
3587  xmm3 = xmm3 + a2 * b1;
3588  xmm4 = xmm4 + a2 * b2;
3589  }
3590  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3591  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3592  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3593  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3594  }
3595  if( j < N ) {
3596  IntrinsicType xmm1, xmm2;
3597  for( size_t k=0UL; k<K; k+=IT::size ) {
3598  const IntrinsicType b1( B.load(k,j) );
3599  xmm1 = xmm1 + A.load(i ,k) * b1;
3600  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3601  }
3602  (~C)(i ,j) += sum( xmm1 ) * scalar;
3603  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3604  }
3605  }
3606  if( i < M ) {
3607  size_t j( 0UL );
3608  for( ; (j+4UL) <= N; j+=4UL ) {
3609  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3610  for( size_t k=0UL; k<K; k+=IT::size ) {
3611  const IntrinsicType a1( A.load(i,k) );
3612  xmm1 = xmm1 + a1 * B.load(k,j );
3613  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3614  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3615  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3616  }
3617  (~C)(i,j ) += sum( xmm1 ) * scalar;
3618  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3619  (~C)(i,j+2UL) += sum( xmm3 ) * scalar;
3620  (~C)(i,j+3UL) += sum( xmm4 ) * scalar;
3621  }
3622  for( ; (j+2UL) <= N; j+=2UL ) {
3623  IntrinsicType xmm1, xmm2;
3624  for( size_t k=0UL; k<K; k+=IT::size ) {
3625  const IntrinsicType a1( A.load(i,k) );
3626  xmm1 = xmm1 + a1 * B.load(k,j );
3627  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3628  }
3629  (~C)(i,j ) += sum( xmm1 ) * scalar;
3630  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3631  }
3632  if( j < N ) {
3633  IntrinsicType xmm1, xmm2;
3634  for( size_t k=0UL; k<K; k+=IT::size ) {
3635  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3636  }
3637  (~C)(i,j) += sum( xmm1 ) * scalar;
3638  }
3639  }
3640  }
3641  //**********************************************************************************************
3642 
3643  //**Vectorized default addition assignment to column-major dense matrices***********************
3657  template< typename MT3 // Type of the left-hand side target matrix
3658  , typename MT4 // Type of the left-hand side matrix operand
3659  , typename MT5 // Type of the right-hand side matrix operand
3660  , typename ST2 > // Type of the scalar value
3661  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3662  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3663  {
3664  typedef IntrinsicTrait<ElementType> IT;
3665 
3666  const size_t M( A.rows() );
3667  const size_t N( B.columns() );
3668  const size_t K( A.columns() );
3669 
3670  size_t i( 0UL );
3671 
3672  for( ; (i+4UL) <= M; i+=4UL ) {
3673  size_t j( 0UL );
3674  for( ; (j+2UL) <= N; j+=2UL ) {
3675  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3676  for( size_t k=0UL; k<K; k+=IT::size ) {
3677  const IntrinsicType a1( A.load(i ,k) );
3678  const IntrinsicType a2( A.load(i+1UL,k) );
3679  const IntrinsicType a3( A.load(i+2UL,k) );
3680  const IntrinsicType a4( A.load(i+3UL,k) );
3681  const IntrinsicType b1( B.load(k,j ) );
3682  const IntrinsicType b2( B.load(k,j+1UL) );
3683  xmm1 = xmm1 + a1 * b1;
3684  xmm2 = xmm2 + a1 * b2;
3685  xmm3 = xmm3 + a2 * b1;
3686  xmm4 = xmm4 + a2 * b2;
3687  xmm5 = xmm5 + a3 * b1;
3688  xmm6 = xmm6 + a3 * b2;
3689  xmm7 = xmm7 + a4 * b1;
3690  xmm8 = xmm8 + a4 * b2;
3691  }
3692  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3693  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3694  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3695  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3696  (~C)(i+2UL,j ) += sum( xmm5 ) * scalar;
3697  (~C)(i+2UL,j+1UL) += sum( xmm6 ) * scalar;
3698  (~C)(i+3UL,j ) += sum( xmm7 ) * scalar;
3699  (~C)(i+3UL,j+1UL) += sum( xmm8 ) * scalar;
3700  }
3701  if( j < N ) {
3702  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3703  for( size_t k=0UL; k<K; k+=IT::size ) {
3704  const IntrinsicType b1( B.load(k,j) );
3705  xmm1 = xmm1 + A.load(i ,k) * b1;
3706  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3707  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3708  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3709  }
3710  (~C)(i ,j) += sum( xmm1 ) * scalar;
3711  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3712  (~C)(i+2UL,j) += sum( xmm3 ) * scalar;
3713  (~C)(i+3UL,j) += sum( xmm4 ) * scalar;
3714  }
3715  }
3716  for( ; (i+2UL) <= M; i+=2UL ) {
3717  size_t j( 0UL );
3718  for( ; (j+2UL) <= N; j+=2UL ) {
3719  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3720  for( size_t k=0UL; k<K; k+=IT::size ) {
3721  const IntrinsicType a1( A.load(i ,k) );
3722  const IntrinsicType a2( A.load(i+1UL,k) );
3723  const IntrinsicType b1( B.load(k,j ) );
3724  const IntrinsicType b2( B.load(k,j+1UL) );
3725  xmm1 = xmm1 + a1 * b1;
3726  xmm2 = xmm2 + a1 * b2;
3727  xmm3 = xmm3 + a2 * b1;
3728  xmm4 = xmm4 + a2 * b2;
3729  }
3730  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3731  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3732  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3733  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3734  }
3735  if( j < N ) {
3736  IntrinsicType xmm1, xmm2;
3737  for( size_t k=0UL; k<K; k+=IT::size ) {
3738  const IntrinsicType b1( B.load(k,j) );
3739  xmm1 = xmm1 + A.load(i ,k) * b1;
3740  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3741  }
3742  (~C)(i ,j) += sum( xmm1 ) * scalar;
3743  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3744  }
3745  }
3746  if( i < M ) {
3747  size_t j( 0UL );
3748  for( ; (j+2UL) <= N; j+=2UL ) {
3749  IntrinsicType xmm1, xmm2;
3750  for( size_t k=0UL; k<K; k+=IT::size ) {
3751  const IntrinsicType a1( A.load(i,k) );
3752  xmm1 = xmm1 + a1 * B.load(k,j );
3753  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3754  }
3755  (~C)(i,j ) += sum( xmm1 ) * scalar;
3756  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3757  }
3758  if( j < N ) {
3759  IntrinsicType xmm1, xmm2;
3760  for( size_t k=0UL; k<K; k+=IT::size ) {
3761  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3762  }
3763  (~C)(i,j) += sum( xmm1 ) * scalar;
3764  }
3765  }
3766  }
3767  //**********************************************************************************************
3768 
3769  //**BLAS-based addition assignment to dense matrices (default)**********************************
3783  template< typename MT3 // Type of the left-hand side target matrix
3784  , typename MT4 // Type of the left-hand side matrix operand
3785  , typename MT5 // Type of the right-hand side matrix operand
3786  , typename ST2 > // Type of the scalar value
3787  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3788  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3789  {
3790  selectDefaultAddAssignKernel( C, A, B, scalar );
3791  }
3792  //**********************************************************************************************
3793 
3794  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3795 #if BLAZE_BLAS_MODE
3796 
3809  template< typename MT3 // Type of the left-hand side target matrix
3810  , typename MT4 // Type of the left-hand side matrix operand
3811  , typename MT5 // Type of the right-hand side matrix operand
3812  , typename ST2 > // Type of the scalar value
3813  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3814  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3815  {
3816  sgemm( C, A, B, scalar, 1.0F );
3817  }
3818 #endif
3819  //**********************************************************************************************
3820 
3821  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3822 #if BLAZE_BLAS_MODE
3823 
3836  template< typename MT3 // Type of the left-hand side target matrix
3837  , typename MT4 // Type of the left-hand side matrix operand
3838  , typename MT5 // Type of the right-hand side matrix operand
3839  , typename ST2 > // Type of the scalar value
3840  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3841  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3842  {
3843  dgemm( C, A, B, scalar, 1.0 );
3844  }
3845 #endif
3846  //**********************************************************************************************
3847 
3848  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3849 #if BLAZE_BLAS_MODE
3850 
3863  template< typename MT3 // Type of the left-hand side target matrix
3864  , typename MT4 // Type of the left-hand side matrix operand
3865  , typename MT5 // Type of the right-hand side matrix operand
3866  , typename ST2 > // Type of the scalar value
3867  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3868  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3869  {
3870  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3871  }
3872 #endif
3873  //**********************************************************************************************
3874 
3875  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3876 #if BLAZE_BLAS_MODE
3877 
3890  template< typename MT3 // Type of the left-hand side target matrix
3891  , typename MT4 // Type of the left-hand side matrix operand
3892  , typename MT5 // Type of the right-hand side matrix operand
3893  , typename ST2 > // Type of the scalar value
3894  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3895  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3896  {
3897  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3898  }
3899 #endif
3900  //**********************************************************************************************
3901 
3902  //**Addition assignment to sparse matrices******************************************************
3903  // No special implementation for the addition assignment to sparse matrices.
3904  //**********************************************************************************************
3905 
3906  //**Subtraction assignment to dense matrices****************************************************
3918  template< typename MT // Type of the target dense matrix
3919  , bool SO > // Storage order of the target dense matrix
3920  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3921  {
3923 
3924  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3925  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3926 
3927  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3928  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3929 
3930  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3931  return;
3932  }
3933 
3934  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3935  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3936 
3937  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3938  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3939  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3940  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3941  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3942  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3943 
3944  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3945  }
3946  //**********************************************************************************************
3947 
3948  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3959  template< typename MT3 // Type of the left-hand side target matrix
3960  , typename MT4 // Type of the left-hand side matrix operand
3961  , typename MT5 // Type of the right-hand side matrix operand
3962  , typename ST2 > // Type of the scalar value
3963  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3964  {
3965  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
3966  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3967  else
3968  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3969  }
3970  //**********************************************************************************************
3971 
3972  //**Default subtraction assignment to dense matrices********************************************
3986  template< typename MT3 // Type of the left-hand side target matrix
3987  , typename MT4 // Type of the left-hand side matrix operand
3988  , typename MT5 // Type of the right-hand side matrix operand
3989  , typename ST2 > // Type of the scalar value
3990  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3991  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3992  {
3993  const ResultType tmp( serial( A * B * scalar ) );
3994  subAssign( C, tmp );
3995  }
3996  //**********************************************************************************************
3997 
3998  //**Vectorized default subtraction assignment to row-major dense matrices***********************
4012  template< typename MT3 // Type of the left-hand side target matrix
4013  , typename MT4 // Type of the left-hand side matrix operand
4014  , typename MT5 // Type of the right-hand side matrix operand
4015  , typename ST2 > // Type of the scalar value
4016  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4017  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4018  {
4019  typedef IntrinsicTrait<ElementType> IT;
4020 
4021  const size_t M( A.rows() );
4022  const size_t N( B.columns() );
4023  const size_t K( A.columns() );
4024 
4025  size_t i( 0UL );
4026 
4027  for( ; (i+2UL) <= M; i+=2UL ) {
4028  size_t j( 0UL );
4029  for( ; (j+4UL) <= N; j+=4UL ) {
4030  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4031  for( size_t k=0UL; k<K; k+=IT::size ) {
4032  const IntrinsicType a1( A.load(i ,k) );
4033  const IntrinsicType a2( A.load(i+1UL,k) );
4034  const IntrinsicType b1( B.load(k,j ) );
4035  const IntrinsicType b2( B.load(k,j+1UL) );
4036  const IntrinsicType b3( B.load(k,j+2UL) );
4037  const IntrinsicType b4( B.load(k,j+3UL) );
4038  xmm1 = xmm1 + a1 * b1;
4039  xmm2 = xmm2 + a1 * b2;
4040  xmm3 = xmm3 + a1 * b3;
4041  xmm4 = xmm4 + a1 * b4;
4042  xmm5 = xmm5 + a2 * b1;
4043  xmm6 = xmm6 + a2 * b2;
4044  xmm7 = xmm7 + a2 * b3;
4045  xmm8 = xmm8 + a2 * b4;
4046  }
4047  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
4048  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
4049  (~C)(i ,j+2UL) -= sum( xmm3 ) * scalar;
4050  (~C)(i ,j+3UL) -= sum( xmm4 ) * scalar;
4051  (~C)(i+1UL,j ) -= sum( xmm5 ) * scalar;
4052  (~C)(i+1UL,j+1UL) -= sum( xmm6 ) * scalar;
4053  (~C)(i+1UL,j+2UL) -= sum( xmm7 ) * scalar;
4054  (~C)(i+1UL,j+3UL) -= sum( xmm8 ) * scalar;
4055  }
4056  for( ; (j+2UL) <= N; j+=2UL ) {
4057  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4058  for( size_t k=0UL; k<K; k+=IT::size ) {
4059  const IntrinsicType a1( A.load(i ,k) );
4060  const IntrinsicType a2( A.load(i+1UL,k) );
4061  const IntrinsicType b1( B.load(k,j ) );
4062  const IntrinsicType b2( B.load(k,j+1UL) );
4063  xmm1 = xmm1 + a1 * b1;
4064  xmm2 = xmm2 + a1 * b2;
4065  xmm3 = xmm3 + a2 * b1;
4066  xmm4 = xmm4 + a2 * b2;
4067  }
4068  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
4069  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
4070  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
4071  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
4072  }
4073  if( j < N ) {
4074  IntrinsicType xmm1, xmm2;
4075  for( size_t k=0UL; k<K; k+=IT::size ) {
4076  const IntrinsicType b1( B.load(k,j) );
4077  xmm1 = xmm1 + A.load(i ,k) * b1;
4078  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4079  }
4080  (~C)(i ,j) -= sum( xmm1 ) * scalar;
4081  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
4082  }
4083  }
4084  if( i < M ) {
4085  size_t j( 0UL );
4086  for( ; (j+4UL) <= N; j+=4UL ) {
4087  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4088  for( size_t k=0UL; k<K; k+=IT::size ) {
4089  const IntrinsicType a1( A.load(i,k) );
4090  xmm1 = xmm1 + a1 * B.load(k,j );
4091  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
4092  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
4093  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
4094  }
4095  (~C)(i,j ) -= sum( xmm1 ) * scalar;
4096  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
4097  (~C)(i,j+2UL) -= sum( xmm3 ) * scalar;
4098  (~C)(i,j+3UL) -= sum( xmm4 ) * scalar;
4099  }
4100  for( ; (j+2UL) <= N; j+=2UL ) {
4101  IntrinsicType xmm1, xmm2;
4102  for( size_t k=0UL; k<K; k+=IT::size ) {
4103  const IntrinsicType a1( A.load(i,k) );
4104  xmm1 = xmm1 + a1 * B.load(k,j );
4105  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
4106  }
4107  (~C)(i,j ) -= sum( xmm1 ) * scalar;
4108  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
4109  }
4110  if( j < N ) {
4111  IntrinsicType xmm1, xmm2;
4112  for( size_t k=0UL; k<K; k+=IT::size ) {
4113  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
4114  }
4115  (~C)(i,j) -= sum( xmm1 ) * scalar;
4116  }
4117  }
4118  }
4119  //**********************************************************************************************
4120 
4121  //**Vectorized default subtraction assignment to column-major dense matrices********************
4135  template< typename MT3 // Type of the left-hand side target matrix
4136  , typename MT4 // Type of the left-hand side matrix operand
4137  , typename MT5 // Type of the right-hand side matrix operand
4138  , typename ST2 > // Type of the scalar value
4139  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4140  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4141  {
4142  typedef IntrinsicTrait<ElementType> IT;
4143 
4144  const size_t M( A.rows() );
4145  const size_t N( B.columns() );
4146  const size_t K( A.columns() );
4147 
4148  size_t i( 0UL );
4149 
4150  for( ; (i+4UL) <= M; i+=4UL ) {
4151  size_t j( 0UL );
4152  for( ; (j+2UL) <= N; j+=2UL ) {
4153  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4154  for( size_t k=0UL; k<K; k+=IT::size ) {
4155  const IntrinsicType a1( A.load(i ,k) );
4156  const IntrinsicType a2( A.load(i+1UL,k) );
4157  const IntrinsicType a3( A.load(i+2UL,k) );
4158  const IntrinsicType a4( A.load(i+3UL,k) );
4159  const IntrinsicType b1( B.load(k,j ) );
4160  const IntrinsicType b2( B.load(k,j+1UL) );
4161  xmm1 = xmm1 + a1 * b1;
4162  xmm2 = xmm2 + a1 * b2;
4163  xmm3 = xmm3 + a2 * b1;
4164  xmm4 = xmm4 + a2 * b2;
4165  xmm5 = xmm5 + a3 * b1;
4166  xmm6 = xmm6 + a3 * b2;
4167  xmm7 = xmm7 + a4 * b1;
4168  xmm8 = xmm8 + a4 * b2;
4169  }
4170  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
4171  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
4172  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
4173  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
4174  (~C)(i+2UL,j ) -= sum( xmm5 ) * scalar;
4175  (~C)(i+2UL,j+1UL) -= sum( xmm6 ) * scalar;
4176  (~C)(i+3UL,j ) -= sum( xmm7 ) * scalar;
4177  (~C)(i+3UL,j+1UL) -= sum( xmm8 ) * scalar;
4178  }
4179  if( j < N ) {
4180  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4181  for( size_t k=0UL; k<K; k+=IT::size ) {
4182  const IntrinsicType b1( B.load(k,j) );
4183  xmm1 = xmm1 + A.load(i ,k) * b1;
4184  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4185  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
4186  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
4187  }
4188  (~C)(i ,j) -= sum( xmm1 ) * scalar;
4189  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
4190  (~C)(i+2UL,j) -= sum( xmm3 ) * scalar;
4191  (~C)(i+3UL,j) -= sum( xmm4 ) * scalar;
4192  }
4193  }
4194  for( ; (i+2UL) <= M; i+=2UL ) {
4195  size_t j( 0UL );
4196  for( ; (j+2UL) <= N; j+=2UL ) {
4197  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4198  for( size_t k=0UL; k<K; k+=IT::size ) {
4199  const IntrinsicType a1( A.load(i ,k) );
4200  const IntrinsicType a2( A.load(i+1UL,k) );
4201  const IntrinsicType b1( B.load(k,j ) );
4202  const IntrinsicType b2( B.load(k,j+1UL) );
4203  xmm1 = xmm1 + a1 * b1;
4204  xmm2 = xmm2 + a1 * b2;
4205  xmm3 = xmm3 + a2 * b1;
4206  xmm4 = xmm4 + a2 * b2;
4207  }
4208  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
4209  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
4210  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
4211  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
4212  }
4213  if( j < N ) {
4214  IntrinsicType xmm1, xmm2;
4215  for( size_t k=0UL; k<K; k+=IT::size ) {
4216  const IntrinsicType b1( B.load(k,j) );
4217  xmm1 = xmm1 + A.load(i ,k) * b1;
4218  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4219  }
4220  (~C)(i ,j) -= sum( xmm1 ) * scalar;
4221  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
4222  }
4223  }
4224  if( i < M ) {
4225  size_t j( 0UL );
4226  for( ; (j+2UL) <= N; j+=2UL ) {
4227  IntrinsicType xmm1, xmm2;
4228  for( size_t k=0UL; k<K; k+=IT::size ) {
4229  const IntrinsicType a1( A.load(i,k) );
4230  xmm1 = xmm1 + a1 * B.load(k,j );
4231  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
4232  }
4233  (~C)(i,j ) -= sum( xmm1 ) * scalar;
4234  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
4235  }
4236  if( j < N ) {
4237  IntrinsicType xmm1, xmm2;
4238  for( size_t k=0UL; k<K; k+=IT::size ) {
4239  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
4240  }
4241  (~C)(i,j) -= sum( xmm1 ) * scalar;
4242  }
4243  }
4244  }
4245  //**********************************************************************************************
4246 
4247  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
4261  template< typename MT3 // Type of the left-hand side target matrix
4262  , typename MT4 // Type of the left-hand side matrix operand
4263  , typename MT5 // Type of the right-hand side matrix operand
4264  , typename ST2 > // Type of the scalar value
4265  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4266  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4267  {
4268  selectDefaultSubAssignKernel( C, A, B, scalar );
4269  }
4270  //**********************************************************************************************
4271 
4272  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
4273 #if BLAZE_BLAS_MODE
4274 
4287  template< typename MT3 // Type of the left-hand side target matrix
4288  , typename MT4 // Type of the left-hand side matrix operand
4289  , typename MT5 // Type of the right-hand side matrix operand
4290  , typename ST2 > // Type of the scalar value
4291  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4292  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4293  {
4294  sgemm( C, A, B, -scalar, 1.0F );
4295  }
4296 #endif
4297  //**********************************************************************************************
4298 
4299  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
4300 #if BLAZE_BLAS_MODE
4301 
4314  template< typename MT3 // Type of the left-hand side target matrix
4315  , typename MT4 // Type of the left-hand side matrix operand
4316  , typename MT5 // Type of the right-hand side matrix operand
4317  , typename ST2 > // Type of the scalar value
4318  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4319  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4320  {
4321  dgemm( C, A, B, -scalar, 1.0 );
4322  }
4323 #endif
4324  //**********************************************************************************************
4325 
4326  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
4327 #if BLAZE_BLAS_MODE
4328 
4341  template< typename MT3 // Type of the left-hand side target matrix
4342  , typename MT4 // Type of the left-hand side matrix operand
4343  , typename MT5 // Type of the right-hand side matrix operand
4344  , typename ST2 > // Type of the scalar value
4345  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4346  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4347  {
4348  cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4349  }
4350 #endif
4351  //**********************************************************************************************
4352 
4353  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
4354 #if BLAZE_BLAS_MODE
4355 
4368  template< typename MT3 // Type of the left-hand side target matrix
4369  , typename MT4 // Type of the left-hand side matrix operand
4370  , typename MT5 // Type of the right-hand side matrix operand
4371  , typename ST2 > // Type of the scalar value
4372  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4373  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4374  {
4375  zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4376  }
4377 #endif
4378  //**********************************************************************************************
4379 
4380  //**Subtraction assignment to sparse matrices***************************************************
4381  // No special implementation for the subtraction assignment to sparse matrices.
4382  //**********************************************************************************************
4383 
4384  //**Multiplication assignment to dense matrices*************************************************
4385  // No special implementation for the multiplication assignment to dense matrices.
4386  //**********************************************************************************************
4387 
4388  //**Multiplication assignment to sparse matrices************************************************
4389  // No special implementation for the multiplication assignment to sparse matrices.
4390  //**********************************************************************************************
4391 
4392  //**SMP assignment to dense matrices************************************************************
4407  template< typename MT // Type of the target dense matrix
4408  , bool SO > // Storage order of the target dense matrix
4409  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4410  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4411  {
4413 
4414  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4415  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4416 
4417  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4418  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4419 
4420  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4421  return;
4422  }
4423  else if( left.columns() == 0UL ) {
4424  reset( ~lhs );
4425  return;
4426  }
4427 
4428  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4429  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4430 
4431  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4432  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4433  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4434  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4435  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4436  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4437 
4438  smpAssign( ~lhs, A * B * rhs.scalar_ );
4439  }
4440  //**********************************************************************************************
4441 
4442  //**SMP assignment to sparse matrices***********************************************************
4457  template< typename MT // Type of the target sparse matrix
4458  , bool SO > // Storage order of the target sparse matrix
4459  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4460  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4461  {
4463 
4464  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
4465 
4472 
4473  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4474  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4475 
4476  const TmpType tmp( rhs );
4477  smpAssign( ~lhs, tmp );
4478  }
4479  //**********************************************************************************************
4480 
4481  //**SMP addition assignment to dense matrices***************************************************
4496  template< typename MT // Type of the target dense matrix
4497  , bool SO > // Storage order of the target dense matrix
4498  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4499  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4500  {
4502 
4503  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4504  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4505 
4506  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4507  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4508 
4509  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4510  return;
4511  }
4512 
4513  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4514  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4515 
4516  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4517  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4518  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4519  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4520  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4521  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4522 
4523  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
4524  }
4525  //**********************************************************************************************
4526 
4527  //**SMP addition assignment to sparse matrices**************************************************
4528  // No special implementation for the SMP addition assignment to sparse matrices.
4529  //**********************************************************************************************
4530 
4531  //**SMP subtraction assignment to dense matrices************************************************
4546  template< typename MT // Type of the target dense matrix
4547  , bool SO > // Storage order of the target dense matrix
4548  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4549  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4550  {
4552 
4553  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4554  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4555 
4556  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4557  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4558 
4559  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4560  return;
4561  }
4562 
4563  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4564  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4565 
4566  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4567  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4568  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4569  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4570  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4571  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4572 
4573  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
4574  }
4575  //**********************************************************************************************
4576 
4577  //**SMP subtraction assignment to sparse matrices***********************************************
4578  // No special implementation for the SMP subtraction assignment to sparse matrices.
4579  //**********************************************************************************************
4580 
4581  //**SMP multiplication assignment to dense matrices*********************************************
4582  // No special implementation for the SMP multiplication assignment to dense matrices.
4583  //**********************************************************************************************
4584 
4585  //**SMP multiplication assignment to sparse matrices********************************************
4586  // No special implementation for the SMP multiplication assignment to sparse matrices.
4587  //**********************************************************************************************
4588 
4589  //**Compile time checks*************************************************************************
4598  //**********************************************************************************************
4599 };
4601 //*************************************************************************************************
4602 
4603 
4604 
4605 
4606 //=================================================================================================
4607 //
4608 // GLOBAL BINARY ARITHMETIC OPERATORS
4609 //
4610 //=================================================================================================
4611 
4612 //*************************************************************************************************
4641 template< typename T1 // Type of the left-hand side dense matrix
4642  , typename T2 > // Type of the right-hand side dense matrix
4643 inline const DMatTDMatMultExpr<T1,T2>
4645 {
4647 
4648  if( (~lhs).columns() != (~rhs).rows() )
4649  throw std::invalid_argument( "Matrix sizes do not match" );
4650 
4651  return DMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
4652 }
4653 //*************************************************************************************************
4654 
4655 
4656 
4657 
4658 //=================================================================================================
4659 //
4660 // ROWS SPECIALIZATIONS
4661 //
4662 //=================================================================================================
4663 
4664 //*************************************************************************************************
4666 template< typename MT1, typename MT2 >
4667 struct Rows< DMatTDMatMultExpr<MT1,MT2> >
4668  : public Rows<MT1>
4669 {};
4671 //*************************************************************************************************
4672 
4673 
4674 
4675 
4676 //=================================================================================================
4677 //
4678 // COLUMNS SPECIALIZATIONS
4679 //
4680 //=================================================================================================
4681 
4682 //*************************************************************************************************
4684 template< typename MT1, typename MT2 >
4685 struct Columns< DMatTDMatMultExpr<MT1,MT2> >
4686  : public Columns<MT2>
4687 {};
4689 //*************************************************************************************************
4690 
4691 
4692 
4693 
4694 //=================================================================================================
4695 //
4696 // ISLOWER SPECIALIZATIONS
4697 //
4698 //=================================================================================================
4699 
4700 //*************************************************************************************************
4702 template< typename MT1, typename MT2 >
4703 struct IsLower< DMatTDMatMultExpr<MT1,MT2> >
4704  : public IsTrue< IsLower<MT1>::value && IsLower<MT2>::value >
4705 {};
4707 //*************************************************************************************************
4708 
4709 
4710 
4711 
4712 //=================================================================================================
4713 //
4714 // ISUPPER SPECIALIZATIONS
4715 //
4716 //=================================================================================================
4717 
4718 //*************************************************************************************************
4720 template< typename MT1, typename MT2 >
4721 struct IsUpper< DMatTDMatMultExpr<MT1,MT2> >
4722  : public IsTrue< IsUpper<MT1>::value && IsUpper<MT2>::value >
4723 {};
4725 //*************************************************************************************************
4726 
4727 
4728 
4729 
4730 //=================================================================================================
4731 //
4732 // EXPRESSION TRAIT SPECIALIZATIONS
4733 //
4734 //=================================================================================================
4735 
4736 //*************************************************************************************************
4738 template< typename MT1, typename MT2, typename VT >
4739 struct DMatDVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
4740 {
4741  public:
4742  //**********************************************************************************************
4743  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4744  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4745  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4746  , typename DMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4747  , INVALID_TYPE >::Type Type;
4748  //**********************************************************************************************
4749 };
4751 //*************************************************************************************************
4752 
4753 
4754 //*************************************************************************************************
4756 template< typename MT1, typename MT2, typename VT >
4757 struct DMatSVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
4758 {
4759  public:
4760  //**********************************************************************************************
4761  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4762  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4763  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4764  , typename DMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4765  , INVALID_TYPE >::Type Type;
4766  //**********************************************************************************************
4767 };
4769 //*************************************************************************************************
4770 
4771 
4772 //*************************************************************************************************
4774 template< typename VT, typename MT1, typename MT2 >
4775 struct TDVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
4776 {
4777  public:
4778  //**********************************************************************************************
4779  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4780  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4781  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4782  , typename TDVecTDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4783  , INVALID_TYPE >::Type Type;
4784  //**********************************************************************************************
4785 };
4787 //*************************************************************************************************
4788 
4789 
4790 //*************************************************************************************************
4792 template< typename VT, typename MT1, typename MT2 >
4793 struct TSVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
4794 {
4795  public:
4796  //**********************************************************************************************
4797  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4798  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4799  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4800  , typename TDVecTDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4801  , INVALID_TYPE >::Type Type;
4802  //**********************************************************************************************
4803 };
4805 //*************************************************************************************************
4806 
4807 
4808 //*************************************************************************************************
4810 template< typename MT1, typename MT2, bool AF >
4811 struct SubmatrixExprTrait< DMatTDMatMultExpr<MT1,MT2>, AF >
4812 {
4813  public:
4814  //**********************************************************************************************
4815  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4816  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4817  //**********************************************************************************************
4818 };
4820 //*************************************************************************************************
4821 
4822 
4823 //*************************************************************************************************
4825 template< typename MT1, typename MT2 >
4826 struct RowExprTrait< DMatTDMatMultExpr<MT1,MT2> >
4827 {
4828  public:
4829  //**********************************************************************************************
4830  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4831  //**********************************************************************************************
4832 };
4834 //*************************************************************************************************
4835 
4836 
4837 //*************************************************************************************************
4839 template< typename MT1, typename MT2 >
4840 struct ColumnExprTrait< DMatTDMatMultExpr<MT1,MT2> >
4841 {
4842  public:
4843  //**********************************************************************************************
4844  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
4845  //**********************************************************************************************
4846 };
4848 //*************************************************************************************************
4849 
4850 } // namespace blaze
4851 
4852 #endif
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatTDMatMultExpr.h:419
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:262
BLAZE_ALWAYS_INLINE int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:270
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:264
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:258
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:315
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
Header file for the IsColumnMajorMatrix type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatTDMatMultExpr.h:409
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
DMatTDMatMultExpr< MT1, MT2 > This
Type of this DMatTDMatMultExpr instance.
Definition: DMatTDMatMultExpr.h:260
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:255
Header file for the TDVecSMatMultExprTrait class template.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:130
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:132
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:300
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:355
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:267
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsSymmetric type trait.
Header file for the IsDouble type trait.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:104
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
const size_t SMP_DMATTDMATMULT_THRESHOLD
SMP row-major dense matrix/column-major dense matrix multiplication threshold.This threshold specifie...
Definition: Thresholds.h:857
Header file for the TDMatSVecMultExprTrait class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:399
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:129
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:133
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatTDMatMultExpr.h:265
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:345
Header file for the IsNumeric type trait.
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:104
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:142
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:261
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
const size_t DMATTDMATMULT_THRESHOLD
Row-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies the ...
Definition: Thresholds.h:142
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:387
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:131
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:365
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:266
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
Header file for the IsTrue value trait.
Header file for basic type definitions.
Header file for the IsComplex type trait.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:428
Header file for the complex data type.
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:122
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:128
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:429
Header file for the IsUpper type trait.
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:375
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:263
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:276
Constraint on the data type.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:279
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:273
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849