All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
52 #include <blaze/math/Intrinsics.h>
53 #include <blaze/math/shims/Reset.h>
73 #include <blaze/system/BLAS.h>
75 #include <blaze/util/Assert.h>
76 #include <blaze/util/Complex.h>
82 #include <blaze/util/EnableIf.h>
83 #include <blaze/util/InvalidType.h>
85 #include <blaze/util/SelectType.h>
86 #include <blaze/util/Types.h>
92 
93 
94 namespace blaze {
95 
96 //=================================================================================================
97 //
98 // CLASS TDMATTDMATMULTEXPR
99 //
100 //=================================================================================================
101 
102 //*************************************************************************************************
109 template< typename MT1 // Type of the left-hand side dense matrix
110  , typename MT2 > // Type of the right-hand side dense matrix
111 class TDMatTDMatMultExpr : public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
112  , private MatMatMultExpr
113  , private Computation
114 {
115  private:
116  //**Type definitions****************************************************************************
117  typedef typename MT1::ResultType RT1;
118  typedef typename MT2::ResultType RT2;
119  typedef typename MT1::ElementType ET1;
120  typedef typename MT2::ElementType ET2;
121  typedef typename MT1::CompositeType CT1;
122  typedef typename MT2::CompositeType CT2;
123  //**********************************************************************************************
124 
125  //**********************************************************************************************
127 
130  template< typename T1, typename T2, typename T3 >
131  struct UseSinglePrecisionKernel {
135  };
137  //**********************************************************************************************
138 
139  //**********************************************************************************************
141 
144  template< typename T1, typename T2, typename T3 >
145  struct UseDoublePrecisionKernel {
149  };
151  //**********************************************************************************************
152 
153  //**********************************************************************************************
155 
159  template< typename T1, typename T2, typename T3 >
160  struct UseSinglePrecisionComplexKernel {
161  typedef complex<float> Type;
162  enum { value = IsSame<typename T1::ElementType,Type>::value &&
163  IsSame<typename T2::ElementType,Type>::value &&
164  IsSame<typename T3::ElementType,Type>::value };
165  };
167  //**********************************************************************************************
168 
169  //**********************************************************************************************
171 
175  template< typename T1, typename T2, typename T3 >
176  struct UseDoublePrecisionComplexKernel {
177  typedef complex<double> Type;
178  enum { value = IsSame<typename T1::ElementType,Type>::value &&
179  IsSame<typename T2::ElementType,Type>::value &&
180  IsSame<typename T3::ElementType,Type>::value };
181  };
183  //**********************************************************************************************
184 
185  //**********************************************************************************************
187 
190  template< typename T1, typename T2, typename T3 >
191  struct UseDefaultKernel {
192  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
193  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
194  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
195  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
196  };
198  //**********************************************************************************************
199 
200  //**********************************************************************************************
202 
205  template< typename T1, typename T2, typename T3 >
206  struct UseVectorizedDefaultKernel {
207  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
208  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
209  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
210  IntrinsicTrait<typename T1::ElementType>::addition &&
211  IntrinsicTrait<typename T1::ElementType>::multiplication };
212  };
214  //**********************************************************************************************
215 
216  public:
217  //**Type definitions****************************************************************************
224  typedef const ElementType ReturnType;
225  typedef const ResultType CompositeType;
226 
228  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
229 
231  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
232 
234  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
235 
237  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
238  //**********************************************************************************************
239 
240  //**Compilation flags***************************************************************************
242  enum { vectorizable = 0 };
243  //**********************************************************************************************
244 
245  //**Constructor*********************************************************************************
251  explicit inline TDMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
252  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
253  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
254  {
255  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
256  }
257  //**********************************************************************************************
258 
259  //**Access operator*****************************************************************************
266  inline ReturnType operator()( size_t i, size_t j ) const {
267  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
268  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
269 
270  ElementType tmp;
271 
272  if( lhs_.columns() != 0UL ) {
273  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
274  tmp = lhs_(i,0UL) * rhs_(0UL,j);
275  for( size_t k=1UL; k<end; k+=2UL ) {
276  tmp += lhs_(i,k ) * rhs_(k ,j);
277  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
278  }
279  if( end < lhs_.columns() ) {
280  tmp += lhs_(i,end) * rhs_(end,j);
281  }
282  }
283  else {
284  reset( tmp );
285  }
286 
287  return tmp;
288  }
289  //**********************************************************************************************
290 
291  //**Rows function*******************************************************************************
296  inline size_t rows() const {
297  return lhs_.rows();
298  }
299  //**********************************************************************************************
300 
301  //**Columns function****************************************************************************
306  inline size_t columns() const {
307  return rhs_.columns();
308  }
309  //**********************************************************************************************
310 
311  //**Left operand access*************************************************************************
316  inline LeftOperand leftOperand() const {
317  return lhs_;
318  }
319  //**********************************************************************************************
320 
321  //**Right operand access************************************************************************
326  inline RightOperand rightOperand() const {
327  return rhs_;
328  }
329  //**********************************************************************************************
330 
331  //**********************************************************************************************
337  template< typename T >
338  inline bool canAlias( const T* alias ) const {
339  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
340  }
341  //**********************************************************************************************
342 
343  //**********************************************************************************************
349  template< typename T >
350  inline bool isAliased( const T* alias ) const {
351  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
352  }
353  //**********************************************************************************************
354 
355  private:
356  //**Member variables****************************************************************************
359  //**********************************************************************************************
360 
361  //**Assignment to dense matrices****************************************************************
371  template< typename MT // Type of the target dense matrix
372  , bool SO > // Storage order of the target dense matrix
373  friend inline void assign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
374  {
376 
377  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
378  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
379 
380  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
381  return;
382  }
383  else if( rhs.lhs_.columns() == 0UL ) {
384  reset( ~lhs );
385  return;
386  }
387 
388  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
389  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
390 
391  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
392  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
393  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
394  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
395  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
396  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
397 
398  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
399  TDMatTDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
400  else
401  TDMatTDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
402  }
404  //**********************************************************************************************
405 
406  //**Default assignment to dense matrices********************************************************
420  template< typename MT3 // Type of the left-hand side target matrix
421  , typename MT4 // Type of the left-hand side matrix operand
422  , typename MT5 > // Type of the right-hand side matrix operand
423  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
424  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
425  {
426  const size_t M( A.rows() );
427  const size_t N( B.columns() );
428  const size_t K( A.columns() );
429 
430  for( size_t i=0UL; i<M; ++i ) {
431  for( size_t j=0UL; j<N; ++j ) {
432  C(i,j) = A(i,0UL) * B(0UL,j);
433  }
434  for( size_t k=1UL; k<K; ++k ) {
435  for( size_t j=0UL; j<N; ++j ) {
436  C(i,j) += A(i,k) * B(k,j);
437  }
438  }
439  }
440  }
442  //**********************************************************************************************
443 
444  //**Vectorized default assignment to row-major dense matrices***********************************
458  template< typename MT3 // Type of the left-hand side target matrix
459  , typename MT4 // Type of the left-hand side matrix operand
460  , typename MT5 > // Type of the right-hand side matrix operand
461  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
462  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
463  {
466 
467  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
468  const typename MT5::OppositeType tmp( B );
469  assign( ~C, A * tmp );
470  }
471  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
472  const typename MT4::OppositeType tmp( A );
473  assign( ~C, tmp * B );
474  }
475  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
476  const typename MT5::OppositeType tmp( B );
477  assign( ~C, A * tmp );
478  }
479  else {
480  const typename MT4::OppositeType tmp( A );
481  assign( ~C, tmp * B );
482  }
483  }
485  //**********************************************************************************************
486 
487  //**Vectorized default assignment to column-major dense matrices********************************
501  template< typename MT3 // Type of the left-hand side target matrix
502  , typename MT4 // Type of the left-hand side matrix operand
503  , typename MT5 > // Type of the right-hand side matrix operand
504  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
505  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
506  {
507  typedef IntrinsicTrait<ElementType> IT;
508 
509  const size_t M( A.rows() );
510  const size_t N( B.columns() );
511  const size_t K( A.columns() );
512 
513  size_t i( 0UL );
514 
515  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
516  for( size_t j=0UL; j<N; ++j ) {
517  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
518  for( size_t k=0UL; k<K; ++k ) {
519  const IntrinsicType b1( set( B(k,j) ) );
520  xmm1 = xmm1 + A.load(i ,k) * b1;
521  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
522  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
523  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
524  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
525  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
526  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
527  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
528  }
529  (~C).store( i , j, xmm1 );
530  (~C).store( i+IT::size , j, xmm2 );
531  (~C).store( i+IT::size*2UL, j, xmm3 );
532  (~C).store( i+IT::size*3UL, j, xmm4 );
533  (~C).store( i+IT::size*4UL, j, xmm5 );
534  (~C).store( i+IT::size*5UL, j, xmm6 );
535  (~C).store( i+IT::size*6UL, j, xmm7 );
536  (~C).store( i+IT::size*7UL, j, xmm8 );
537  }
538  }
539  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
540  size_t j( 0UL );
541  for( ; (j+2UL) <= N; j+=2UL ) {
542  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
543  for( size_t k=0UL; k<K; ++k ) {
544  const IntrinsicType a1( A.load(i ,k) );
545  const IntrinsicType a2( A.load(i+IT::size ,k) );
546  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
547  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
548  const IntrinsicType b1( set( B(k,j ) ) );
549  const IntrinsicType b2( set( B(k,j+1UL) ) );
550  xmm1 = xmm1 + a1 * b1;
551  xmm2 = xmm2 + a2 * b1;
552  xmm3 = xmm3 + a3 * b1;
553  xmm4 = xmm4 + a4 * b1;
554  xmm5 = xmm5 + a1 * b2;
555  xmm6 = xmm6 + a2 * b2;
556  xmm7 = xmm7 + a3 * b2;
557  xmm8 = xmm8 + a4 * b2;
558  }
559  (~C).store( i , j , xmm1 );
560  (~C).store( i+IT::size , j , xmm2 );
561  (~C).store( i+IT::size*2UL, j , xmm3 );
562  (~C).store( i+IT::size*3UL, j , xmm4 );
563  (~C).store( i , j+1UL, xmm5 );
564  (~C).store( i+IT::size , j+1UL, xmm6 );
565  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
566  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
567  }
568  if( j < N ) {
569  IntrinsicType xmm1, xmm2, xmm3, xmm4;
570  for( size_t k=0UL; k<K; ++k ) {
571  const IntrinsicType b1( set( B(k,j) ) );
572  xmm1 = xmm1 + A.load(i ,k) * b1;
573  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
574  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
575  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
576  }
577  (~C).store( i , j, xmm1 );
578  (~C).store( i+IT::size , j, xmm2 );
579  (~C).store( i+IT::size*2UL, j, xmm3 );
580  (~C).store( i+IT::size*3UL, j, xmm4 );
581  }
582  }
583  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
584  size_t j( 0UL );
585  for( ; (j+2UL) <= N; j+=2UL ) {
586  IntrinsicType xmm1, xmm2, xmm3, xmm4;
587  for( size_t k=0UL; k<K; ++k ) {
588  const IntrinsicType a1( A.load(i ,k) );
589  const IntrinsicType a2( A.load(i+IT::size,k) );
590  const IntrinsicType b1( set( B(k,j ) ) );
591  const IntrinsicType b2( set( B(k,j+1UL) ) );
592  xmm1 = xmm1 + a1 * b1;
593  xmm2 = xmm2 + a2 * b1;
594  xmm3 = xmm3 + a1 * b2;
595  xmm4 = xmm4 + a2 * b2;
596  }
597  (~C).store( i , j , xmm1 );
598  (~C).store( i+IT::size, j , xmm2 );
599  (~C).store( i , j+1UL, xmm3 );
600  (~C).store( i+IT::size, j+1UL, xmm4 );
601  }
602  if( j < N ) {
603  IntrinsicType xmm1, xmm2;
604  for( size_t k=0UL; k<K; ++k ) {
605  const IntrinsicType b1( set( B(k,j) ) );
606  xmm1 = xmm1 + A.load(i ,k) * b1;
607  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
608  }
609  (~C).store( i , j, xmm1 );
610  (~C).store( i+IT::size, j, xmm2 );
611  }
612  }
613  if( i < M ) {
614  size_t j( 0UL );
615  for( ; (j+2UL) <= N; j+=2UL ) {
616  IntrinsicType xmm1, xmm2;
617  for( size_t k=0UL; k<K; ++k ) {
618  const IntrinsicType a1( A.load(i,k) );
619  xmm1 = xmm1 + a1 * set( B(k,j ) );
620  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
621  }
622  (~C).store( i, j , xmm1 );
623  (~C).store( i, j+1UL, xmm2 );
624  }
625  if( j < N ) {
626  IntrinsicType xmm1;
627  for( size_t k=0UL; k<K; ++k ) {
628  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
629  }
630  (~C).store( i, j, xmm1 );
631  }
632  }
633  }
635  //**********************************************************************************************
636 
637  //**BLAS-based assignment to dense matrices (default)*******************************************
651  template< typename MT3 // Type of the left-hand side target matrix
652  , typename MT4 // Type of the left-hand side matrix operand
653  , typename MT5 > // Type of the right-hand side matrix operand
654  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
655  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
656  {
657  selectDefaultAssignKernel( C, A, B );
658  }
660  //**********************************************************************************************
661 
662  //**BLAS-based assignment to dense matrices (single precision)**********************************
663 #if BLAZE_BLAS_MODE
664 
677  template< typename MT3 // Type of the left-hand side target matrix
678  , typename MT4 // Type of the left-hand side matrix operand
679  , typename MT5 > // Type of the right-hand side matrix operand
680  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
681  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
682  {
683  using boost::numeric_cast;
684 
688 
689  const int M ( numeric_cast<int>( A.rows() ) );
690  const int N ( numeric_cast<int>( B.columns() ) );
691  const int K ( numeric_cast<int>( A.columns() ) );
692  const int lda( numeric_cast<int>( A.spacing() ) );
693  const int ldb( numeric_cast<int>( B.spacing() ) );
694  const int ldc( numeric_cast<int>( C.spacing() ) );
695 
696  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
697  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
698  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
699  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
700  }
702 #endif
703  //**********************************************************************************************
704 
705  //**BLAS-based assignment to dense matrices (double precision)**********************************
706 #if BLAZE_BLAS_MODE
707 
720  template< typename MT3 // Type of the left-hand side target matrix
721  , typename MT4 // Type of the left-hand side matrix operand
722  , typename MT5 > // Type of the right-hand side matrix operand
723  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
724  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
725  {
726  using boost::numeric_cast;
727 
731 
732  const int M ( numeric_cast<int>( A.rows() ) );
733  const int N ( numeric_cast<int>( B.columns() ) );
734  const int K ( numeric_cast<int>( A.columns() ) );
735  const int lda( numeric_cast<int>( A.spacing() ) );
736  const int ldb( numeric_cast<int>( B.spacing() ) );
737  const int ldc( numeric_cast<int>( C.spacing() ) );
738 
739  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
740  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
741  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
742  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
743  }
745 #endif
746  //**********************************************************************************************
747 
748  //**BLAS-based assignment to dense matrices (single precision complex)**************************
749 #if BLAZE_BLAS_MODE
750 
763  template< typename MT3 // Type of the left-hand side target matrix
764  , typename MT4 // Type of the left-hand side matrix operand
765  , typename MT5 > // Type of the right-hand side matrix operand
766  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
767  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
768  {
769  using boost::numeric_cast;
770 
774  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
775  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
776  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
777 
778  const int M ( numeric_cast<int>( A.rows() ) );
779  const int N ( numeric_cast<int>( B.columns() ) );
780  const int K ( numeric_cast<int>( A.columns() ) );
781  const int lda( numeric_cast<int>( A.spacing() ) );
782  const int ldb( numeric_cast<int>( B.spacing() ) );
783  const int ldc( numeric_cast<int>( C.spacing() ) );
784  complex<float> alpha( 1.0F, 0.0F );
785  complex<float> beta ( 0.0F, 0.0F );
786 
787  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
788  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
789  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
790  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
791  }
793 #endif
794  //**********************************************************************************************
795 
796  //**BLAS-based assignment to dense matrices (double precision complex)**************************
797 #if BLAZE_BLAS_MODE
798 
811  template< typename MT3 // Type of the left-hand side target matrix
812  , typename MT4 // Type of the left-hand side matrix operand
813  , typename MT5 > // Type of the right-hand side matrix operand
814  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
815  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
816  {
817  using boost::numeric_cast;
818 
822  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
823  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
824  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
825 
826  const int M ( numeric_cast<int>( A.rows() ) );
827  const int N ( numeric_cast<int>( B.columns() ) );
828  const int K ( numeric_cast<int>( A.columns() ) );
829  const int lda( numeric_cast<int>( A.spacing() ) );
830  const int ldb( numeric_cast<int>( B.spacing() ) );
831  const int ldc( numeric_cast<int>( C.spacing() ) );
832  complex<double> alpha( 1.0, 0.0 );
833  complex<double> beta ( 0.0, 0.0 );
834 
835  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
836  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
837  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
838  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
839  }
841 #endif
842  //**********************************************************************************************
843 
844  //**Assignment to sparse matrices***************************************************************
857  template< typename MT // Type of the target sparse matrix
858  , bool SO > // Storage order of the target sparse matrix
859  friend inline void assign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
860  {
862 
863  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
864 
871 
872  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
873  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
874 
875  const TmpType tmp( rhs );
876  assign( ~lhs, tmp );
877  }
879  //**********************************************************************************************
880 
881  //**Addition assignment to dense matrices*******************************************************
894  template< typename MT // Type of the target dense matrix
895  , bool SO > // Storage order of the target dense matrix
896  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
897  {
899 
900  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
901  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
902 
903  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
904  return;
905  }
906 
907  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
908  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
909 
910  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
911  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
912  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
913  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
914  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
915  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
916 
917  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
918  TDMatTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
919  else
920  TDMatTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
921  }
923  //**********************************************************************************************
924 
925  //**Default addition assignment to dense matrices***********************************************
939  template< typename MT3 // Type of the left-hand side target matrix
940  , typename MT4 // Type of the left-hand side matrix operand
941  , typename MT5 > // Type of the right-hand side matrix operand
942  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
943  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
944  {
945  const size_t M( A.rows() );
946  const size_t N( B.columns() );
947  const size_t K( A.columns() );
948 
949  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
950  const size_t end( N & size_t(-2) );
951 
952  for( size_t i=0UL; i<M; ++i ) {
953  for( size_t k=0UL; k<K; ++k ) {
954  for( size_t j=0UL; j<end; j+=2UL ) {
955  C(i,j ) += A(i,k) * B(k,j );
956  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
957  }
958  if( end < N ) {
959  C(i,end) += A(i,k) * B(k,end);
960  }
961  }
962  }
963  }
965  //**********************************************************************************************
966 
967  //**Vectorized default addition assignment to row-major dense matrices**************************
981  template< typename MT3 // Type of the left-hand side target matrix
982  , typename MT4 // Type of the left-hand side matrix operand
983  , typename MT5 > // Type of the right-hand side matrix operand
984  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
985  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
986  {
989 
990  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
991  const typename MT5::OppositeType tmp( B );
992  addAssign( ~C, A * tmp );
993  }
994  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
995  const typename MT4::OppositeType tmp( A );
996  addAssign( ~C, tmp * B );
997  }
998  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
999  const typename MT5::OppositeType tmp( B );
1000  addAssign( ~C, A * tmp );
1001  }
1002  else {
1003  const typename MT4::OppositeType tmp( A );
1004  addAssign( ~C, tmp * B );
1005  }
1006  }
1008  //**********************************************************************************************
1009 
1010  //**Vectorized default addition assignment to column-major dense matrices***********************
1024  template< typename MT3 // Type of the left-hand side target matrix
1025  , typename MT4 // Type of the left-hand side matrix operand
1026  , typename MT5 > // Type of the right-hand side matrix operand
1027  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1028  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1029  {
1030  typedef IntrinsicTrait<ElementType> IT;
1031 
1032  const size_t M( A.rows() );
1033  const size_t N( B.columns() );
1034  const size_t K( A.columns() );
1035 
1036  size_t i( 0UL );
1037 
1038  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1039  for( size_t j=0UL; j<N; ++j ) {
1040  IntrinsicType xmm1( (~C).load(i ,j) );
1041  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1042  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1043  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1044  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
1045  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
1046  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
1047  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
1048  for( size_t k=0UL; k<K; ++k ) {
1049  const IntrinsicType b1( set( B(k,j) ) );
1050  xmm1 = xmm1 + A.load(i ,k) * b1;
1051  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1052  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1053  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1054  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
1055  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
1056  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
1057  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
1058  }
1059  (~C).store( i , j, xmm1 );
1060  (~C).store( i+IT::size , j, xmm2 );
1061  (~C).store( i+IT::size*2UL, j, xmm3 );
1062  (~C).store( i+IT::size*3UL, j, xmm4 );
1063  (~C).store( i+IT::size*4UL, j, xmm5 );
1064  (~C).store( i+IT::size*5UL, j, xmm6 );
1065  (~C).store( i+IT::size*6UL, j, xmm7 );
1066  (~C).store( i+IT::size*7UL, j, xmm8 );
1067  }
1068  }
1069  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1070  size_t j( 0UL );
1071  for( ; (j+2UL) <= N; j+=2UL ) {
1072  IntrinsicType xmm1( (~C).load(i ,j ) );
1073  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
1074  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
1075  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
1076  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1077  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
1078  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
1079  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
1080  for( size_t k=0UL; k<K; ++k ) {
1081  const IntrinsicType a1( A.load(i ,k) );
1082  const IntrinsicType a2( A.load(i+IT::size ,k) );
1083  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
1084  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
1085  const IntrinsicType b1( set( B(k,j ) ) );
1086  const IntrinsicType b2( set( B(k,j+1UL) ) );
1087  xmm1 = xmm1 + a1 * b1;
1088  xmm2 = xmm2 + a2 * b1;
1089  xmm3 = xmm3 + a3 * b1;
1090  xmm4 = xmm4 + a4 * b1;
1091  xmm5 = xmm5 + a1 * b2;
1092  xmm6 = xmm6 + a2 * b2;
1093  xmm7 = xmm7 + a3 * b2;
1094  xmm8 = xmm8 + a4 * b2;
1095  }
1096  (~C).store( i , j , xmm1 );
1097  (~C).store( i+IT::size , j , xmm2 );
1098  (~C).store( i+IT::size*2UL, j , xmm3 );
1099  (~C).store( i+IT::size*3UL, j , xmm4 );
1100  (~C).store( i , j+1UL, xmm5 );
1101  (~C).store( i+IT::size , j+1UL, xmm6 );
1102  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
1103  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
1104  }
1105  if( j < N ) {
1106  IntrinsicType xmm1( (~C).load(i ,j) );
1107  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1108  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1109  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1110  for( size_t k=0UL; k<K; ++k ) {
1111  const IntrinsicType b1( set( B(k,j) ) );
1112  xmm1 = xmm1 + A.load(i ,k) * b1;
1113  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1114  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1115  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1116  }
1117  (~C).store( i , j, xmm1 );
1118  (~C).store( i+IT::size , j, xmm2 );
1119  (~C).store( i+IT::size*2UL, j, xmm3 );
1120  (~C).store( i+IT::size*3UL, j, xmm4 );
1121  }
1122  }
1123  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1124  size_t j( 0UL );
1125  for( ; (j+2UL) <= N; j+=2UL ) {
1126  IntrinsicType xmm1( (~C).load(i ,j ) );
1127  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
1128  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1129  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
1130  for( size_t k=0UL; k<K; ++k ) {
1131  const IntrinsicType a1( A.load(i ,k) );
1132  const IntrinsicType a2( A.load(i+IT::size,k) );
1133  const IntrinsicType b1( set( B(k,j ) ) );
1134  const IntrinsicType b2( set( B(k,j+1UL) ) );
1135  xmm1 = xmm1 + a1 * b1;
1136  xmm2 = xmm2 + a2 * b1;
1137  xmm3 = xmm3 + a1 * b2;
1138  xmm4 = xmm4 + a2 * b2;
1139  }
1140  (~C).store( i , j , xmm1 );
1141  (~C).store( i+IT::size, j , xmm2 );
1142  (~C).store( i , j+1UL, xmm3 );
1143  (~C).store( i+IT::size, j+1UL, xmm4 );
1144  }
1145  if( j < N ) {
1146  IntrinsicType xmm1( (~C).load(i ,j) );
1147  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
1148  for( size_t k=0UL; k<K; ++k ) {
1149  const IntrinsicType b1( set( B(k,j) ) );
1150  xmm1 = xmm1 + A.load(i ,k) * b1;
1151  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1152  }
1153  (~C).store( i , j, xmm1 );
1154  (~C).store( i+IT::size, j, xmm2 );
1155  }
1156  }
1157  if( i < M ) {
1158  size_t j( 0UL );
1159  for( ; (j+2UL) <= N; j+=2UL ) {
1160  IntrinsicType xmm1( (~C).load(i,j ) );
1161  IntrinsicType xmm2( (~C).load(i,j+1UL) );
1162  for( size_t k=0UL; k<K; ++k ) {
1163  const IntrinsicType a1( A.load(i,k) );
1164  xmm1 = xmm1 + a1 * set( B(k,j ) );
1165  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1166  }
1167  (~C).store( i, j , xmm1 );
1168  (~C).store( i, j+1UL, xmm2 );
1169  }
1170  if( j < N ) {
1171  IntrinsicType xmm1( (~C).load(i,j) );
1172  for( size_t k=0UL; k<K; ++k ) {
1173  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
1174  }
1175  (~C).store( i, j, xmm1 );
1176  }
1177  }
1178  }
1180  //**********************************************************************************************
1181 
1182  //**BLAS-based addition assignment to dense matrices (default)**********************************
1196  template< typename MT3 // Type of the left-hand side target matrix
1197  , typename MT4 // Type of the left-hand side matrix operand
1198  , typename MT5 > // Type of the right-hand side matrix operand
1199  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1200  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1201  {
1202  selectDefaultAddAssignKernel( C, A, B );
1203  }
1205  //**********************************************************************************************
1206 
1207  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1208 #if BLAZE_BLAS_MODE
1209 
1222  template< typename MT3 // Type of the left-hand side target matrix
1223  , typename MT4 // Type of the left-hand side matrix operand
1224  , typename MT5 > // Type of the right-hand side matrix operand
1225  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1226  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1227  {
1228  using boost::numeric_cast;
1229 
1233 
1234  const int M ( numeric_cast<int>( A.rows() ) );
1235  const int N ( numeric_cast<int>( B.columns() ) );
1236  const int K ( numeric_cast<int>( A.columns() ) );
1237  const int lda( numeric_cast<int>( A.spacing() ) );
1238  const int ldb( numeric_cast<int>( B.spacing() ) );
1239  const int ldc( numeric_cast<int>( C.spacing() ) );
1240 
1241  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1242  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1243  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1244  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1245  }
1247 #endif
1248  //**********************************************************************************************
1249 
1250  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1251 #if BLAZE_BLAS_MODE
1252 
1265  template< typename MT3 // Type of the left-hand side target matrix
1266  , typename MT4 // Type of the left-hand side matrix operand
1267  , typename MT5 > // Type of the right-hand side matrix operand
1268  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1269  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1270  {
1271  using boost::numeric_cast;
1272 
1276 
1277  const int M ( numeric_cast<int>( A.rows() ) );
1278  const int N ( numeric_cast<int>( B.columns() ) );
1279  const int K ( numeric_cast<int>( A.columns() ) );
1280  const int lda( numeric_cast<int>( A.spacing() ) );
1281  const int ldb( numeric_cast<int>( B.spacing() ) );
1282  const int ldc( numeric_cast<int>( C.spacing() ) );
1283 
1284  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1285  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1286  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1287  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1288  }
1290 #endif
1291  //**********************************************************************************************
1292 
1293  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1294 #if BLAZE_BLAS_MODE
1295 
1308  template< typename MT3 // Type of the left-hand side target matrix
1309  , typename MT4 // Type of the left-hand side matrix operand
1310  , typename MT5 > // Type of the right-hand side matrix operand
1311  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1312  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1313  {
1314  using boost::numeric_cast;
1315 
1319  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1320  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1321  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1322 
1323  const int M ( numeric_cast<int>( A.rows() ) );
1324  const int N ( numeric_cast<int>( B.columns() ) );
1325  const int K ( numeric_cast<int>( A.columns() ) );
1326  const int lda( numeric_cast<int>( A.spacing() ) );
1327  const int ldb( numeric_cast<int>( B.spacing() ) );
1328  const int ldc( numeric_cast<int>( C.spacing() ) );
1329  const complex<float> alpha( 1.0F, 0.0F );
1330  const complex<float> beta ( 1.0F, 0.0F );
1331 
1332  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1333  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1334  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1335  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1336  }
1338 #endif
1339  //**********************************************************************************************
1340 
1341  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1342 #if BLAZE_BLAS_MODE
1343 
1356  template< typename MT3 // Type of the left-hand side target matrix
1357  , typename MT4 // Type of the left-hand side matrix operand
1358  , typename MT5 > // Type of the right-hand side matrix operand
1359  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1360  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1361  {
1362  using boost::numeric_cast;
1363 
1367  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1368  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1369  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1370 
1371  const int M ( numeric_cast<int>( A.rows() ) );
1372  const int N ( numeric_cast<int>( B.columns() ) );
1373  const int K ( numeric_cast<int>( A.columns() ) );
1374  const int lda( numeric_cast<int>( A.spacing() ) );
1375  const int ldb( numeric_cast<int>( B.spacing() ) );
1376  const int ldc( numeric_cast<int>( C.spacing() ) );
1377  const complex<double> alpha( 1.0, 0.0 );
1378  const complex<double> beta ( 1.0, 0.0 );
1379 
1380  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1381  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1382  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1383  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1384  }
1386 #endif
1387  //**********************************************************************************************
1388 
1389  //**Addition assignment to sparse matrices******************************************************
1390  // No special implementation for the addition assignment to sparse matrices.
1391  //**********************************************************************************************
1392 
1393  //**Subtraction assignment to dense matrices****************************************************
1406  template< typename MT // Type of the target dense matrix
1407  , bool SO > // Storage order of the target dense matrix
1408  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1409  {
1411 
1412  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1413  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1414 
1415  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1416  return;
1417  }
1418 
1419  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1420  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1421 
1422  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1423  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1424  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1425  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1426  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1427  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1428 
1429  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
1430  TDMatTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1431  else
1432  TDMatTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1433  }
1435  //**********************************************************************************************
1436 
1437  //**Default subtraction assignment to dense matrices********************************************
1451  template< typename MT3 // Type of the left-hand side target matrix
1452  , typename MT4 // Type of the left-hand side matrix operand
1453  , typename MT5 > // Type of the right-hand side matrix operand
1454  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1455  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1456  {
1457  const size_t M( A.rows() );
1458  const size_t N( B.columns() );
1459  const size_t K( A.columns() );
1460 
1461  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1462  const size_t end( N & size_t(-2) );
1463 
1464  for( size_t i=0UL; i<M; ++i ) {
1465  for( size_t k=0UL; k<K; ++k ) {
1466  for( size_t j=0UL; j<end; j+=2UL ) {
1467  C(i,j ) -= A(i,k) * B(k,j );
1468  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1469  }
1470  if( end < N ) {
1471  C(i,end) -= A(i,k) * B(k,end);
1472  }
1473  }
1474  }
1475  }
1477  //**********************************************************************************************
1478 
1479  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1493  template< typename MT3 // Type of the left-hand side target matrix
1494  , typename MT4 // Type of the left-hand side matrix operand
1495  , typename MT5 > // Type of the right-hand side matrix operand
1496  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1497  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1498  {
1501 
1502  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1503  const typename MT5::OppositeType tmp( B );
1504  subAssign( ~C, A * tmp );
1505  }
1506  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1507  const typename MT4::OppositeType tmp( A );
1508  subAssign( ~C, tmp * B );
1509  }
1510  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1511  const typename MT5::OppositeType tmp( B );
1512  subAssign( ~C, A * tmp );
1513  }
1514  else {
1515  const typename MT4::OppositeType tmp( A );
1516  subAssign( ~C, tmp * B );
1517  }
1518  }
1520  //**********************************************************************************************
1521 
1522  //**Vectorized default subtraction assignment to column-major dense matrices********************
1536  template< typename MT3 // Type of the left-hand side target matrix
1537  , typename MT4 // Type of the left-hand side matrix operand
1538  , typename MT5 > // Type of the right-hand side matrix operand
1539  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1540  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1541  {
1542  typedef IntrinsicTrait<ElementType> IT;
1543 
1544  const size_t M( A.rows() );
1545  const size_t N( B.columns() );
1546  const size_t K( A.columns() );
1547 
1548  size_t i( 0UL );
1549 
1550  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1551  for( size_t j=0UL; j<N; ++j ) {
1552  IntrinsicType xmm1( (~C).load(i ,j) );
1553  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1554  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1555  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1556  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
1557  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
1558  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
1559  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
1560  for( size_t k=0UL; k<K; ++k ) {
1561  const IntrinsicType b1( set( B(k,j) ) );
1562  xmm1 = xmm1 - A.load(i ,k) * b1;
1563  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1564  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1565  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1566  xmm5 = xmm5 - A.load(i+IT::size*4UL,k) * b1;
1567  xmm6 = xmm6 - A.load(i+IT::size*5UL,k) * b1;
1568  xmm7 = xmm7 - A.load(i+IT::size*6UL,k) * b1;
1569  xmm8 = xmm8 - A.load(i+IT::size*7UL,k) * b1;
1570  }
1571  (~C).store( i , j, xmm1 );
1572  (~C).store( i+IT::size , j, xmm2 );
1573  (~C).store( i+IT::size*2UL, j, xmm3 );
1574  (~C).store( i+IT::size*3UL, j, xmm4 );
1575  (~C).store( i+IT::size*4UL, j, xmm5 );
1576  (~C).store( i+IT::size*5UL, j, xmm6 );
1577  (~C).store( i+IT::size*6UL, j, xmm7 );
1578  (~C).store( i+IT::size*7UL, j, xmm8 );
1579  }
1580  }
1581  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1582  size_t j( 0UL );
1583  for( ; (j+2UL) <= N; j+=2UL ) {
1584  IntrinsicType xmm1( (~C).load(i ,j ) );
1585  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
1586  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
1587  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
1588  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1589  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
1590  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
1591  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
1592  for( size_t k=0UL; k<K; ++k ) {
1593  const IntrinsicType a1( A.load(i ,k) );
1594  const IntrinsicType a2( A.load(i+IT::size ,k) );
1595  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
1596  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
1597  const IntrinsicType b1( set( B(k,j ) ) );
1598  const IntrinsicType b2( set( B(k,j+1UL) ) );
1599  xmm1 = xmm1 - a1 * b1;
1600  xmm2 = xmm2 - a2 * b1;
1601  xmm3 = xmm3 - a3 * b1;
1602  xmm4 = xmm4 - a4 * b1;
1603  xmm5 = xmm5 - a1 * b2;
1604  xmm6 = xmm6 - a2 * b2;
1605  xmm7 = xmm7 - a3 * b2;
1606  xmm8 = xmm8 - a4 * b2;
1607  }
1608  (~C).store( i , j , xmm1 );
1609  (~C).store( i+IT::size , j , xmm2 );
1610  (~C).store( i+IT::size*2UL, j , xmm3 );
1611  (~C).store( i+IT::size*3UL, j , xmm4 );
1612  (~C).store( i , j+1UL, xmm5 );
1613  (~C).store( i+IT::size , j+1UL, xmm6 );
1614  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
1615  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
1616  }
1617  if( j < N ) {
1618  IntrinsicType xmm1( (~C).load(i ,j) );
1619  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1620  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1621  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1622  for( size_t k=0UL; k<K; ++k ) {
1623  const IntrinsicType b1( set( B(k,j) ) );
1624  xmm1 = xmm1 - A.load(i ,k) * b1;
1625  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1626  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1627  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1628  }
1629  (~C).store( i , j, xmm1 );
1630  (~C).store( i+IT::size , j, xmm2 );
1631  (~C).store( i+IT::size*2UL, j, xmm3 );
1632  (~C).store( i+IT::size*3UL, j, xmm4 );
1633  }
1634  }
1635  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1636  size_t j( 0UL );
1637  for( ; (j+2UL) <= N; j+=2UL ) {
1638  IntrinsicType xmm1( (~C).load(i ,j ) );
1639  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
1640  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1641  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
1642  for( size_t k=0UL; k<K; ++k ) {
1643  const IntrinsicType a1( A.load(i ,k) );
1644  const IntrinsicType a2( A.load(i+IT::size,k) );
1645  const IntrinsicType b1( set( B(k,j ) ) );
1646  const IntrinsicType b2( set( B(k,j+1UL) ) );
1647  xmm1 = xmm1 - a1 * b1;
1648  xmm2 = xmm2 - a2 * b1;
1649  xmm3 = xmm3 - a1 * b2;
1650  xmm4 = xmm4 - a2 * b2;
1651  }
1652  (~C).store( i , j , xmm1 );
1653  (~C).store( i+IT::size, j , xmm2 );
1654  (~C).store( i , j+1UL, xmm3 );
1655  (~C).store( i+IT::size, j+1UL, xmm4 );
1656  }
1657  if( j < N ) {
1658  IntrinsicType xmm1( (~C).load(i ,j) );
1659  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
1660  for( size_t k=0UL; k<K; ++k ) {
1661  const IntrinsicType b1( set( B(k,j) ) );
1662  xmm1 = xmm1 - A.load(i ,k) * b1;
1663  xmm2 = xmm2 - A.load(i+IT::size,k) * b1;
1664  }
1665  (~C).store( i , j, xmm1 );
1666  (~C).store( i+IT::size, j, xmm2 );
1667  }
1668  }
1669  if( i < M ) {
1670  size_t j( 0UL );
1671  for( ; (j+2UL) <= N; j+=2UL ) {
1672  IntrinsicType xmm1( (~C).load(i,j ) );
1673  IntrinsicType xmm2( (~C).load(i,j+1UL) );
1674  for( size_t k=0UL; k<K; ++k ) {
1675  const IntrinsicType a1( A.load(i,k) );
1676  xmm1 = xmm1 - a1 * set( B(k,j ) );
1677  xmm2 = xmm2 - a1 * set( B(k,j+1UL) );
1678  }
1679  (~C).store( i, j , xmm1 );
1680  (~C).store( i, j+1UL, xmm2 );
1681  }
1682  if( j < N ) {
1683  IntrinsicType xmm1( (~C).load(i,j) );
1684  for( size_t k=0UL; k<K; ++k ) {
1685  xmm1 = xmm1 - A.load(i,k) * set( B(k,j) );
1686  }
1687  (~C).store( i, j, xmm1 );
1688  }
1689  }
1690  }
1692  //**********************************************************************************************
1693 
1694  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1708  template< typename MT3 // Type of the left-hand side target matrix
1709  , typename MT4 // Type of the left-hand side matrix operand
1710  , typename MT5 > // Type of the right-hand side matrix operand
1711  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1712  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1713  {
1714  selectDefaultSubAssignKernel( C, A, B );
1715  }
1717  //**********************************************************************************************
1718 
1719  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1720 #if BLAZE_BLAS_MODE
1721 
1734  template< typename MT3 // Type of the left-hand side target matrix
1735  , typename MT4 // Type of the left-hand side matrix operand
1736  , typename MT5 > // Type of the right-hand side matrix operand
1737  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1738  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1739  {
1740  using boost::numeric_cast;
1741 
1745 
1746  const int M ( numeric_cast<int>( A.rows() ) );
1747  const int N ( numeric_cast<int>( B.columns() ) );
1748  const int K ( numeric_cast<int>( A.columns() ) );
1749  const int lda( numeric_cast<int>( A.spacing() ) );
1750  const int ldb( numeric_cast<int>( B.spacing() ) );
1751  const int ldc( numeric_cast<int>( C.spacing() ) );
1752 
1753  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1754  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1755  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1756  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1757  }
1759 #endif
1760  //**********************************************************************************************
1761 
1762  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1763 #if BLAZE_BLAS_MODE
1764 
1777  template< typename MT3 // Type of the left-hand side target matrix
1778  , typename MT4 // Type of the left-hand side matrix operand
1779  , typename MT5 > // Type of the right-hand side matrix operand
1780  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1781  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1782  {
1783  using boost::numeric_cast;
1784 
1788 
1789  const int M ( numeric_cast<int>( A.rows() ) );
1790  const int N ( numeric_cast<int>( B.columns() ) );
1791  const int K ( numeric_cast<int>( A.columns() ) );
1792  const int lda( numeric_cast<int>( A.spacing() ) );
1793  const int ldb( numeric_cast<int>( B.spacing() ) );
1794  const int ldc( numeric_cast<int>( C.spacing() ) );
1795 
1796  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1797  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1798  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1799  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1800  }
1802 #endif
1803  //**********************************************************************************************
1804 
1805  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1806 #if BLAZE_BLAS_MODE
1807 
1820  template< typename MT3 // Type of the left-hand side target matrix
1821  , typename MT4 // Type of the left-hand side matrix operand
1822  , typename MT5 > // Type of the right-hand side matrix operand
1823  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1824  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1825  {
1826  using boost::numeric_cast;
1827 
1831  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1832  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1833  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1834 
1835  const int M ( numeric_cast<int>( A.rows() ) );
1836  const int N ( numeric_cast<int>( B.columns() ) );
1837  const int K ( numeric_cast<int>( A.columns() ) );
1838  const int lda( numeric_cast<int>( A.spacing() ) );
1839  const int ldb( numeric_cast<int>( B.spacing() ) );
1840  const int ldc( numeric_cast<int>( C.spacing() ) );
1841  const complex<float> alpha( -1.0F, 0.0F );
1842  const complex<float> beta ( 1.0F, 0.0F );
1843 
1844  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1845  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1846  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1847  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1848  }
1850 #endif
1851  //**********************************************************************************************
1852 
1853  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1854 #if BLAZE_BLAS_MODE
1855 
1868  template< typename MT3 // Type of the left-hand side target matrix
1869  , typename MT4 // Type of the left-hand side matrix operand
1870  , typename MT5 > // Type of the right-hand side matrix operand
1871  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1872  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1873  {
1874  using boost::numeric_cast;
1875 
1879  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1880  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1881  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1882 
1883  const int M ( numeric_cast<int>( A.rows() ) );
1884  const int N ( numeric_cast<int>( B.columns() ) );
1885  const int K ( numeric_cast<int>( A.columns() ) );
1886  const int lda( numeric_cast<int>( A.spacing() ) );
1887  const int ldb( numeric_cast<int>( B.spacing() ) );
1888  const int ldc( numeric_cast<int>( C.spacing() ) );
1889  const complex<double> alpha( -1.0, 0.0 );
1890  const complex<double> beta ( 1.0, 0.0 );
1891 
1892  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1893  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1894  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1895  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1896  }
1898 #endif
1899  //**********************************************************************************************
1900 
1901  //**Subtraction assignment to sparse matrices***************************************************
1902  // No special implementation for the subtraction assignment to sparse matrices.
1903  //**********************************************************************************************
1904 
1905  //**Multiplication assignment to dense matrices*************************************************
1906  // No special implementation for the multiplication assignment to dense matrices.
1907  //**********************************************************************************************
1908 
1909  //**Multiplication assignment to sparse matrices************************************************
1910  // No special implementation for the multiplication assignment to sparse matrices.
1911  //**********************************************************************************************
1912 
1913  //**Compile time checks*************************************************************************
1920  //**********************************************************************************************
1921 };
1922 //*************************************************************************************************
1923 
1924 
1925 
1926 
1927 //=================================================================================================
1928 //
1929 // DMATSCALARMULTEXPR SPECIALIZATION
1930 //
1931 //=================================================================================================
1932 
1933 //*************************************************************************************************
1941 template< typename MT1 // Type of the left-hand side dense matrix
1942  , typename MT2 // Type of the right-hand side dense matrix
1943  , typename ST > // Type of the right-hand side scalar value
1944 class DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >
1945  : public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
1946  , private MatScalarMultExpr
1947  , private Computation
1948 {
1949  private:
1950  //**Type definitions****************************************************************************
1951  typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
1952  typedef typename MMM::ResultType RES;
1953  typedef typename MT1::ResultType RT1;
1954  typedef typename MT2::ResultType RT2;
1955  typedef typename MT1::CompositeType CT1;
1956  typedef typename MT2::CompositeType CT2;
1957  //**********************************************************************************************
1958 
1959  //**********************************************************************************************
1961 
1964  template< typename T1, typename T2, typename T3, typename T4 >
1965  struct UseSinglePrecisionKernel {
1966  enum { value = IsFloat<typename T1::ElementType>::value &&
1967  IsFloat<typename T2::ElementType>::value &&
1968  IsFloat<typename T3::ElementType>::value &&
1969  !IsComplex<T4>::value };
1970  };
1971  //**********************************************************************************************
1972 
1973  //**********************************************************************************************
1975 
1978  template< typename T1, typename T2, typename T3, typename T4 >
1979  struct UseDoublePrecisionKernel {
1980  enum { value = IsDouble<typename T1::ElementType>::value &&
1981  IsDouble<typename T2::ElementType>::value &&
1982  IsDouble<typename T3::ElementType>::value &&
1983  !IsComplex<T4>::value };
1984  };
1985  //**********************************************************************************************
1986 
1987  //**********************************************************************************************
1989 
1992  template< typename T1, typename T2, typename T3 >
1993  struct UseSinglePrecisionComplexKernel {
1994  typedef complex<float> Type;
1995  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1996  IsSame<typename T2::ElementType,Type>::value &&
1997  IsSame<typename T3::ElementType,Type>::value };
1998  };
1999  //**********************************************************************************************
2000 
2001  //**********************************************************************************************
2003 
2006  template< typename T1, typename T2, typename T3 >
2007  struct UseDoublePrecisionComplexKernel {
2008  typedef complex<double> Type;
2009  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2010  IsSame<typename T2::ElementType,Type>::value &&
2011  IsSame<typename T3::ElementType,Type>::value };
2012  };
2013  //**********************************************************************************************
2014 
2015  //**********************************************************************************************
2017 
2019  template< typename T1, typename T2, typename T3, typename T4 >
2020  struct UseDefaultKernel {
2021  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2022  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2023  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2024  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2025  };
2026  //**********************************************************************************************
2027 
2028  //**********************************************************************************************
2030 
2032  template< typename T1, typename T2, typename T3, typename T4 >
2033  struct UseVectorizedDefaultKernel {
2034  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2035  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2036  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2037  IsSame<typename T1::ElementType,T4>::value &&
2038  IntrinsicTrait<typename T1::ElementType>::addition &&
2039  IntrinsicTrait<typename T1::ElementType>::multiplication };
2040  };
2041  //**********************************************************************************************
2042 
2043  public:
2044  //**Type definitions****************************************************************************
2045  typedef DMatScalarMultExpr<MMM,ST,true> This;
2046  typedef typename MultTrait<RES,ST>::Type ResultType;
2047  typedef typename ResultType::OppositeType OppositeType;
2048  typedef typename ResultType::TransposeType TransposeType;
2049  typedef typename ResultType::ElementType ElementType;
2050  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2051  typedef const ElementType ReturnType;
2052  typedef const ResultType CompositeType;
2053 
2055  typedef const TDMatTDMatMultExpr<MT1,MT2> LeftOperand;
2056 
2058  typedef ST RightOperand;
2059 
2061  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
2062 
2064  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
2065  //**********************************************************************************************
2066 
2067  //**Compilation flags***************************************************************************
2069  enum { vectorizable = 0 };
2070  //**********************************************************************************************
2071 
2072  //**Constructor*********************************************************************************
2078  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2079  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2080  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2081  {}
2082  //**********************************************************************************************
2083 
2084  //**Access operator*****************************************************************************
2091  inline ReturnType operator()( size_t i, size_t j ) const {
2092  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2093  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2094  return matrix_(i,j) * scalar_;
2095  }
2096  //**********************************************************************************************
2097 
2098  //**Rows function*******************************************************************************
2103  inline size_t rows() const {
2104  return matrix_.rows();
2105  }
2106  //**********************************************************************************************
2107 
2108  //**Columns function****************************************************************************
2113  inline size_t columns() const {
2114  return matrix_.columns();
2115  }
2116  //**********************************************************************************************
2117 
2118  //**Left operand access*************************************************************************
2123  inline LeftOperand leftOperand() const {
2124  return matrix_;
2125  }
2126  //**********************************************************************************************
2127 
2128  //**Right operand access************************************************************************
2133  inline RightOperand rightOperand() const {
2134  return scalar_;
2135  }
2136  //**********************************************************************************************
2137 
2138  //**********************************************************************************************
2144  template< typename T >
2145  inline bool canAlias( const T* alias ) const {
2146  return matrix_.canAlias( alias );
2147  }
2148  //**********************************************************************************************
2149 
2150  //**********************************************************************************************
2156  template< typename T >
2157  inline bool isAliased( const T* alias ) const {
2158  return matrix_.isAliased( alias );
2159  }
2160  //**********************************************************************************************
2161 
2162  private:
2163  //**Member variables****************************************************************************
2165  RightOperand scalar_;
2166  //**********************************************************************************************
2167 
2168  //**Assignment to dense matrices****************************************************************
2177  template< typename MT3 // Type of the target dense matrix
2178  , bool SO > // Storage order of the target dense matrix
2179  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2180  {
2182 
2183  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2184  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2185 
2186  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2187  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2188 
2189  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2190  return;
2191  }
2192  else if( left.columns() == 0UL ) {
2193  reset( ~lhs );
2194  return;
2195  }
2196 
2197  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2198  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2199 
2200  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2201  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2202  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2203  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2204  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2205  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2206 
2207  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
2208  DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2209  else
2210  DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2211  }
2212  //**********************************************************************************************
2213 
2214  //**Default assignment to dense matrices********************************************************
2228  template< typename MT3 // Type of the left-hand side target matrix
2229  , typename MT4 // Type of the left-hand side matrix operand
2230  , typename MT5 // Type of the right-hand side matrix operand
2231  , typename ST2 > // Type of the scalar value
2232  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2233  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2234  {
2235  for( size_t i=0UL; i<A.rows(); ++i ) {
2236  for( size_t k=0UL; k<B.columns(); ++k ) {
2237  C(i,k) = A(i,0UL) * B(0UL,k);
2238  }
2239  for( size_t j=1UL; j<A.columns(); ++j ) {
2240  for( size_t k=0UL; k<B.columns(); ++k ) {
2241  C(i,k) += A(i,j) * B(j,k);
2242  }
2243  }
2244  for( size_t k=0UL; k<B.columns(); ++k ) {
2245  C(i,k) *= scalar;
2246  }
2247  }
2248  }
2249  //**********************************************************************************************
2250 
2251  //**Vectorized default assignment to row-major dense matrices***********************************
2265  template< typename MT3 // Type of the left-hand side target matrix
2266  , typename MT4 // Type of the left-hand side matrix operand
2267  , typename MT5 // Type of the right-hand side matrix operand
2268  , typename ST2 > // Type of the scalar value
2269  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2270  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2271  {
2274 
2275  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2276  const typename MT5::OppositeType tmp( B );
2277  assign( ~C, A * tmp * scalar );
2278  }
2279  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2280  const typename MT4::OppositeType tmp( A );
2281  assign( ~C, tmp * B * scalar );
2282  }
2283  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2284  const typename MT5::OppositeType tmp( B );
2285  assign( ~C, A * tmp * scalar );
2286  }
2287  else {
2288  const typename MT4::OppositeType tmp( A );
2289  assign( ~C, tmp * B * scalar );
2290  }
2291  }
2292  //**********************************************************************************************
2293 
2294  //**Vectorized default assignment to column-major dense matrices********************************
2308  template< typename MT3 // Type of the left-hand side target matrix
2309  , typename MT4 // Type of the left-hand side matrix operand
2310  , typename MT5 // Type of the right-hand side matrix operand
2311  , typename ST2 > // Type of the scalar value
2312  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2313  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2314  {
2315  typedef IntrinsicTrait<ElementType> IT;
2316 
2317  const size_t M( A.rows() );
2318  const size_t N( B.columns() );
2319  const size_t K( A.columns() );
2320 
2321  const IntrinsicType factor( set( scalar ) );
2322 
2323  size_t i( 0UL );
2324 
2325  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2326  for( size_t j=0UL; j<N; ++j ) {
2327  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2328  for( size_t k=0UL; k<K; ++k ) {
2329  const IntrinsicType b1( set( B(k,j) ) );
2330  xmm1 = xmm1 + A.load(i ,k) * b1;
2331  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2332  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2333  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2334  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
2335  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
2336  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
2337  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
2338  }
2339  (~C).store( i , j, xmm1 * factor );
2340  (~C).store( i+IT::size , j, xmm2 * factor );
2341  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
2342  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
2343  (~C).store( i+IT::size*4UL, j, xmm5 * factor );
2344  (~C).store( i+IT::size*5UL, j, xmm6 * factor );
2345  (~C).store( i+IT::size*6UL, j, xmm7 * factor );
2346  (~C).store( i+IT::size*7UL, j, xmm8 * factor );
2347  }
2348  }
2349  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2350  size_t j( 0UL );
2351  for( ; (j+2UL) <= N; j+=2UL ) {
2352  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2353  for( size_t k=0UL; k<K; ++k ) {
2354  const IntrinsicType a1( A.load(i ,k) );
2355  const IntrinsicType a2( A.load(i+IT::size ,k) );
2356  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
2357  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
2358  const IntrinsicType b1( set( B(k,j ) ) );
2359  const IntrinsicType b2( set( B(k,j+1UL) ) );
2360  xmm1 = xmm1 + a1 * b1;
2361  xmm2 = xmm2 + a2 * b1;
2362  xmm3 = xmm3 + a3 * b1;
2363  xmm4 = xmm4 + a4 * b1;
2364  xmm5 = xmm5 + a1 * b2;
2365  xmm6 = xmm6 + a2 * b2;
2366  xmm7 = xmm7 + a3 * b2;
2367  xmm8 = xmm8 + a4 * b2;
2368  }
2369  (~C).store( i , j , xmm1 * factor );
2370  (~C).store( i+IT::size , j , xmm2 * factor );
2371  (~C).store( i+IT::size*2UL, j , xmm3 * factor );
2372  (~C).store( i+IT::size*3UL, j , xmm4 * factor );
2373  (~C).store( i , j+1UL, xmm5 * factor );
2374  (~C).store( i+IT::size , j+1UL, xmm6 * factor );
2375  (~C).store( i+IT::size*2UL, j+1UL, xmm7 * factor );
2376  (~C).store( i+IT::size*3UL, j+1UL, xmm8 * factor );
2377  }
2378  if( j < N ) {
2379  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2380  for( size_t k=0UL; k<K; ++k ) {
2381  const IntrinsicType b1( set( B(k,j) ) );
2382  xmm1 = xmm1 + A.load(i ,k) * b1;
2383  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2384  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2385  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2386  }
2387  (~C).store( i , j, xmm1 * factor );
2388  (~C).store( i+IT::size , j, xmm2 * factor );
2389  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
2390  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
2391  }
2392  }
2393  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2394  size_t j( 0UL );
2395  for( ; (j+2UL) <= N; j+=2UL ) {
2396  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2397  for( size_t k=0UL; k<K; ++k ) {
2398  const IntrinsicType a1( A.load(i ,k) );
2399  const IntrinsicType a2( A.load(i+IT::size,k) );
2400  const IntrinsicType b1( set( B(k,j ) ) );
2401  const IntrinsicType b2( set( B(k,j+1UL) ) );
2402  xmm1 = xmm1 + a1 * b1;
2403  xmm2 = xmm2 + a2 * b1;
2404  xmm3 = xmm3 + a1 * b2;
2405  xmm4 = xmm4 + a2 * b2;
2406  }
2407  (~C).store( i , j , xmm1 * factor );
2408  (~C).store( i+IT::size, j , xmm2 * factor );
2409  (~C).store( i , j+1UL, xmm3 * factor );
2410  (~C).store( i+IT::size, j+1UL, xmm4 * factor );
2411  }
2412  if( j < N ) {
2413  IntrinsicType xmm1, xmm2;
2414  for( size_t k=0UL; k<K; ++k ) {
2415  const IntrinsicType b1( set( B(k,j) ) );
2416  xmm1 = xmm1 + A.load(i ,k) * b1;
2417  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
2418  }
2419  (~C).store( i , j, xmm1 * factor );
2420  (~C).store( i+IT::size, j, xmm2 * factor );
2421  }
2422  }
2423  if( i < M ) {
2424  size_t j( 0UL );
2425  for( ; (j+2UL) <= N; j+=2UL ) {
2426  IntrinsicType xmm1, xmm2;
2427  for( size_t k=0UL; k<K; ++k ) {
2428  const IntrinsicType a1( A.load(i,k) );
2429  xmm1 = xmm1 + a1 * set( B(k,j ) );
2430  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2431  }
2432  (~C).store( i, j , xmm1 * factor );
2433  (~C).store( i, j+1UL, xmm2 * factor );
2434  }
2435  if( j < N ) {
2436  IntrinsicType xmm1;
2437  for( size_t k=0UL; k<K; ++k ) {
2438  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
2439  }
2440  (~C).store( i, j, xmm1 * factor );
2441  }
2442  }
2443  }
2444  //**********************************************************************************************
2445 
2446  //**BLAS-based assignment to dense matrices (default)*******************************************
2460  template< typename MT3 // Type of the left-hand side target matrix
2461  , typename MT4 // Type of the left-hand side matrix operand
2462  , typename MT5 // Type of the right-hand side matrix operand
2463  , typename ST2 > // Type of the scalar value
2464  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2465  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2466  {
2467  selectDefaultAssignKernel( C, A, B, scalar );
2468  }
2469  //**********************************************************************************************
2470 
2471  //**BLAS-based assignment to dense matrices (single precision)**********************************
2472 #if BLAZE_BLAS_MODE
2473 
2486  template< typename MT3 // Type of the left-hand side target matrix
2487  , typename MT4 // Type of the left-hand side matrix operand
2488  , typename MT5 // Type of the right-hand side matrix operand
2489  , typename ST2 > // Type of the scalar value
2490  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2491  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2492  {
2493  using boost::numeric_cast;
2494 
2498 
2499  const int M ( numeric_cast<int>( A.rows() ) );
2500  const int N ( numeric_cast<int>( B.columns() ) );
2501  const int K ( numeric_cast<int>( A.columns() ) );
2502  const int lda( numeric_cast<int>( A.spacing() ) );
2503  const int ldb( numeric_cast<int>( B.spacing() ) );
2504  const int ldc( numeric_cast<int>( C.spacing() ) );
2505 
2506  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2507  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2508  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2509  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2510  }
2511 #endif
2512  //**********************************************************************************************
2513 
2514  //**BLAS-based assignment to dense matrices (double precision)**********************************
2515 #if BLAZE_BLAS_MODE
2516 
2529  template< typename MT3 // Type of the left-hand side target matrix
2530  , typename MT4 // Type of the left-hand side matrix operand
2531  , typename MT5 // Type of the right-hand side matrix operand
2532  , typename ST2 > // Type of the scalar value
2533  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2534  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2535  {
2536  using boost::numeric_cast;
2537 
2541 
2542  const int M ( numeric_cast<int>( A.rows() ) );
2543  const int N ( numeric_cast<int>( B.columns() ) );
2544  const int K ( numeric_cast<int>( A.columns() ) );
2545  const int lda( numeric_cast<int>( A.spacing() ) );
2546  const int ldb( numeric_cast<int>( B.spacing() ) );
2547  const int ldc( numeric_cast<int>( C.spacing() ) );
2548 
2549  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2550  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2551  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2552  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2553  }
2554 #endif
2555  //**********************************************************************************************
2556 
2557  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2558 #if BLAZE_BLAS_MODE
2559 
2572  template< typename MT3 // Type of the left-hand side target matrix
2573  , typename MT4 // Type of the left-hand side matrix operand
2574  , typename MT5 // Type of the right-hand side matrix operand
2575  , typename ST2 > // Type of the scalar value
2576  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2577  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2578  {
2579  using boost::numeric_cast;
2580 
2584  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2585  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2586  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2587 
2588  const int M ( numeric_cast<int>( A.rows() ) );
2589  const int N ( numeric_cast<int>( B.columns() ) );
2590  const int K ( numeric_cast<int>( A.columns() ) );
2591  const int lda( numeric_cast<int>( A.spacing() ) );
2592  const int ldb( numeric_cast<int>( B.spacing() ) );
2593  const int ldc( numeric_cast<int>( C.spacing() ) );
2594  const complex<float> alpha( scalar );
2595  const complex<float> beta ( 0.0F, 0.0F );
2596 
2597  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2598  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2599  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2600  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2601  }
2602 #endif
2603  //**********************************************************************************************
2604 
2605  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2606 #if BLAZE_BLAS_MODE
2607 
2620  template< typename MT3 // Type of the left-hand side target matrix
2621  , typename MT4 // Type of the left-hand side matrix operand
2622  , typename MT5 // Type of the right-hand side matrix operand
2623  , typename ST2 > // Type of the scalar value
2624  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2625  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2626  {
2627  using boost::numeric_cast;
2628 
2632  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2633  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2634  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2635 
2636  const int M ( numeric_cast<int>( A.rows() ) );
2637  const int N ( numeric_cast<int>( B.columns() ) );
2638  const int K ( numeric_cast<int>( A.columns() ) );
2639  const int lda( numeric_cast<int>( A.spacing() ) );
2640  const int ldb( numeric_cast<int>( B.spacing() ) );
2641  const int ldc( numeric_cast<int>( C.spacing() ) );
2642  const complex<double> alpha( scalar );
2643  const complex<double> beta ( 0.0, 0.0 );
2644 
2645  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2646  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2647  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2648  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2649  }
2650 #endif
2651  //**********************************************************************************************
2652 
2653  //**Assignment to sparse matrices***************************************************************
2665  template< typename MT // Type of the target sparse matrix
2666  , bool SO > // Storage order of the target sparse matrix
2667  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2668  {
2670 
2671  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2672 
2679 
2680  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2681  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2682 
2683  const TmpType tmp( rhs );
2684  assign( ~lhs, tmp );
2685  }
2686  //**********************************************************************************************
2687 
2688  //**Addition assignment to dense matrices*******************************************************
2700  template< typename MT3 // Type of the target dense matrix
2701  , bool SO > // Storage order of the target dense matrix
2702  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2703  {
2705 
2706  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2707  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2708 
2709  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2710  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2711 
2712  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2713  return;
2714  }
2715 
2716  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2717  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2718 
2719  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2720  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2721  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2722  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2723  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2724  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2725 
2726  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
2727  DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2728  else
2729  DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2730  }
2731  //**********************************************************************************************
2732 
2733  //**Default addition assignment to dense matrices***********************************************
2747  template< typename MT3 // Type of the left-hand side target matrix
2748  , typename MT4 // Type of the left-hand side matrix operand
2749  , typename MT5 // Type of the right-hand side matrix operand
2750  , typename ST2 > // Type of the scalar value
2751  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2752  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2753  {
2754  const ResultType tmp( A * B * scalar );
2755  addAssign( C, tmp );
2756  }
2757  //**********************************************************************************************
2758 
2759  //**Vectorized default addition assignment to row-major dense matrices**************************
2773  template< typename MT3 // Type of the left-hand side target matrix
2774  , typename MT4 // Type of the left-hand side matrix operand
2775  , typename MT5 // Type of the right-hand side matrix operand
2776  , typename ST2 > // Type of the scalar value
2777  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2778  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2779  {
2782 
2783  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2784  const typename MT5::OppositeType tmp( B );
2785  addAssign( ~C, A * tmp * scalar );
2786  }
2787  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2788  const typename MT4::OppositeType tmp( A );
2789  addAssign( ~C, tmp * B * scalar );
2790  }
2791  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2792  const typename MT5::OppositeType tmp( B );
2793  addAssign( ~C, A * tmp * scalar );
2794  }
2795  else {
2796  const typename MT4::OppositeType tmp( A );
2797  addAssign( ~C, tmp * B * scalar );
2798  }
2799  }
2800  //**********************************************************************************************
2801 
2802  //**Vectorized default addition assignment to column-major dense matrices***********************
2816  template< typename MT3 // Type of the left-hand side target matrix
2817  , typename MT4 // Type of the left-hand side matrix operand
2818  , typename MT5 // Type of the right-hand side matrix operand
2819  , typename ST2 > // Type of the scalar value
2820  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2821  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2822  {
2823  typedef IntrinsicTrait<ElementType> IT;
2824 
2825  const size_t M( A.rows() );
2826  const size_t N( B.columns() );
2827  const size_t K( A.columns() );
2828 
2829  const IntrinsicType factor( set( scalar ) );
2830 
2831  size_t i( 0UL );
2832 
2833  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2834  for( size_t j=0UL; j<N; ++j ) {
2835  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2836  for( size_t k=0UL; k<K; ++k ) {
2837  const IntrinsicType b1( set( B(k,j) ) );
2838  xmm1 = xmm1 + A.load(i ,k) * b1;
2839  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2840  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2841  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2842  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
2843  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
2844  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
2845  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
2846  }
2847  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
2848  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
2849  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
2850  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
2851  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) + xmm5 * factor );
2852  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) + xmm6 * factor );
2853  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) + xmm7 * factor );
2854  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) + xmm8 * factor );
2855  }
2856  }
2857  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2858  size_t j( 0UL );
2859  for( ; (j+2UL) <= N; j+=2UL ) {
2860  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2861  for( size_t k=0UL; k<K; ++k ) {
2862  const IntrinsicType a1( A.load(i ,k) );
2863  const IntrinsicType a2( A.load(i+IT::size ,k) );
2864  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
2865  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
2866  const IntrinsicType b1( set( B(k,j ) ) );
2867  const IntrinsicType b2( set( B(k,j+1UL) ) );
2868  xmm1 = xmm1 + a1 * b1;
2869  xmm2 = xmm2 + a2 * b1;
2870  xmm3 = xmm3 + a3 * b1;
2871  xmm4 = xmm4 + a4 * b1;
2872  xmm5 = xmm5 + a1 * b2;
2873  xmm6 = xmm6 + a2 * b2;
2874  xmm7 = xmm7 + a3 * b2;
2875  xmm8 = xmm8 + a4 * b2;
2876  }
2877  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
2878  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) + xmm2 * factor );
2879  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) + xmm3 * factor );
2880  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) + xmm4 * factor );
2881  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
2882  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) + xmm6 * factor );
2883  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) + xmm7 * factor );
2884  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) + xmm8 * factor );
2885  }
2886  if( j < N ) {
2887  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2888  for( size_t k=0UL; k<K; ++k ) {
2889  const IntrinsicType b1( set( B(k,j) ) );
2890  xmm1 = xmm1 + A.load(i ,k) * b1;
2891  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2892  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2893  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2894  }
2895  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
2896  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
2897  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
2898  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
2899  }
2900  }
2901  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2902  size_t j( 0UL );
2903  for( ; (j+2UL) <= N; j+=2UL ) {
2904  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2905  for( size_t k=0UL; k<K; ++k ) {
2906  const IntrinsicType a1( A.load(i ,k) );
2907  const IntrinsicType a2( A.load(i+IT::size,k) );
2908  const IntrinsicType b1( set( B(k,j ) ) );
2909  const IntrinsicType b2( set( B(k,j+1UL) ) );
2910  xmm1 = xmm1 + a1 * b1;
2911  xmm2 = xmm2 + a2 * b1;
2912  xmm3 = xmm3 + a1 * b2;
2913  xmm4 = xmm4 + a2 * b2;
2914  }
2915  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
2916  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) + xmm2 * factor );
2917  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
2918  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) + xmm4 * factor );
2919  }
2920  if( j < N ) {
2921  IntrinsicType xmm1, xmm2;
2922  for( size_t k=0UL; k<K; ++k ) {
2923  const IntrinsicType b1( set( B(k,j) ) );
2924  xmm1 = xmm1 + A.load(i ,k) * b1;
2925  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
2926  }
2927  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
2928  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) + xmm2 * factor );
2929  }
2930  }
2931  if( i < M ) {
2932  size_t j( 0UL );
2933  for( ; (j+2UL) <= N; j+=2UL ) {
2934  IntrinsicType xmm1, xmm2;
2935  for( size_t k=0UL; k<K; ++k ) {
2936  const IntrinsicType a1( A.load(i,k) );
2937  xmm1 = xmm1 + a1 * set( B(k,j ) );
2938  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2939  }
2940  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
2941  (~C).store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
2942  }
2943  if( j < N ) {
2944  IntrinsicType xmm1;
2945  for( size_t k=0UL; k<K; ++k ) {
2946  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
2947  }
2948  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
2949  }
2950  }
2951  }
2952  //**********************************************************************************************
2953 
2954  //**BLAS-based addition assignment to dense matrices (default)**********************************
2968  template< typename MT3 // Type of the left-hand side target matrix
2969  , typename MT4 // Type of the left-hand side matrix operand
2970  , typename MT5 // Type of the right-hand side matrix operand
2971  , typename ST2 > // Type of the scalar value
2972  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2973  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2974  {
2975  selectDefaultAddAssignKernel( C, A, B, scalar );
2976  }
2977  //**********************************************************************************************
2978 
2979  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2980 #if BLAZE_BLAS_MODE
2981 
2994  template< typename MT3 // Type of the left-hand side target matrix
2995  , typename MT4 // Type of the left-hand side matrix operand
2996  , typename MT5 // Type of the right-hand side matrix operand
2997  , typename ST2 > // Type of the scalar value
2998  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2999  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3000  {
3001  using boost::numeric_cast;
3002 
3006 
3007  const int M ( numeric_cast<int>( A.rows() ) );
3008  const int N ( numeric_cast<int>( B.columns() ) );
3009  const int K ( numeric_cast<int>( A.columns() ) );
3010  const int lda( numeric_cast<int>( A.spacing() ) );
3011  const int ldb( numeric_cast<int>( B.spacing() ) );
3012  const int ldc( numeric_cast<int>( C.spacing() ) );
3013 
3014  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3015  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3016  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3017  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3018  }
3019 #endif
3020  //**********************************************************************************************
3021 
3022  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3023 #if BLAZE_BLAS_MODE
3024 
3037  template< typename MT3 // Type of the left-hand side target matrix
3038  , typename MT4 // Type of the left-hand side matrix operand
3039  , typename MT5 // Type of the right-hand side matrix operand
3040  , typename ST2 > // Type of the scalar value
3041  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3042  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3043  {
3044  using boost::numeric_cast;
3045 
3049 
3050  const int M ( numeric_cast<int>( A.rows() ) );
3051  const int N ( numeric_cast<int>( B.columns() ) );
3052  const int K ( numeric_cast<int>( A.columns() ) );
3053  const int lda( numeric_cast<int>( A.spacing() ) );
3054  const int ldb( numeric_cast<int>( B.spacing() ) );
3055  const int ldc( numeric_cast<int>( C.spacing() ) );
3056 
3057  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3058  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3059  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3060  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3061  }
3062 #endif
3063  //**********************************************************************************************
3064 
3065  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3066 #if BLAZE_BLAS_MODE
3067 
3080  template< typename MT3 // Type of the left-hand side target matrix
3081  , typename MT4 // Type of the left-hand side matrix operand
3082  , typename MT5 // Type of the right-hand side matrix operand
3083  , typename ST2 > // Type of the scalar value
3084  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3085  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3086  {
3087  using boost::numeric_cast;
3088 
3092  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3093  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3094  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3095 
3096  const int M ( numeric_cast<int>( A.rows() ) );
3097  const int N ( numeric_cast<int>( B.columns() ) );
3098  const int K ( numeric_cast<int>( A.columns() ) );
3099  const int lda( numeric_cast<int>( A.spacing() ) );
3100  const int ldb( numeric_cast<int>( B.spacing() ) );
3101  const int ldc( numeric_cast<int>( C.spacing() ) );
3102  const complex<float> alpha( scalar );
3103  const complex<float> beta ( 1.0F, 0.0F );
3104 
3105  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3106  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3107  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3108  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3109  }
3110 #endif
3111  //**********************************************************************************************
3112 
3113  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3114 #if BLAZE_BLAS_MODE
3115 
3128  template< typename MT3 // Type of the left-hand side target matrix
3129  , typename MT4 // Type of the left-hand side matrix operand
3130  , typename MT5 // Type of the right-hand side matrix operand
3131  , typename ST2 > // Type of the scalar value
3132  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3133  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3134  {
3135  using boost::numeric_cast;
3136 
3140  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3141  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3142  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3143 
3144  const int M ( numeric_cast<int>( A.rows() ) );
3145  const int N ( numeric_cast<int>( B.columns() ) );
3146  const int K ( numeric_cast<int>( A.columns() ) );
3147  const int lda( numeric_cast<int>( A.spacing() ) );
3148  const int ldb( numeric_cast<int>( B.spacing() ) );
3149  const int ldc( numeric_cast<int>( C.spacing() ) );
3150  const complex<double> alpha( scalar );
3151  const complex<double> beta ( 1.0, 0.0 );
3152 
3153  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3154  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3155  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3156  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3157  }
3158 #endif
3159  //**********************************************************************************************
3160 
3161  //**Addition assignment to sparse matrices******************************************************
3162  // No special implementation for the addition assignment to sparse matrices.
3163  //**********************************************************************************************
3164 
3165  //**Subtraction assignment to dense matrices****************************************************
3177  template< typename MT3 // Type of the target dense matrix
3178  , bool SO > // Storage order of the target dense matrix
3179  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3180  {
3182 
3183  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3184  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3185 
3186  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3187  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3188 
3189  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3190  return;
3191  }
3192 
3193  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3194  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3195 
3196  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3197  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3198  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3199  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3200  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3201  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3202 
3203  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
3204  DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3205  else
3206  DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3207  }
3208  //**********************************************************************************************
3209 
3210  //**Default subtraction assignment to dense matrices********************************************
3224  template< typename MT3 // Type of the left-hand side target matrix
3225  , typename MT4 // Type of the left-hand side matrix operand
3226  , typename MT5 // Type of the right-hand side matrix operand
3227  , typename ST2 > // Type of the scalar value
3228  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3229  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3230  {
3231  const ResultType tmp( A * B * scalar );
3232  subAssign( C, tmp );
3233  }
3234  //**********************************************************************************************
3235 
3236  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3250  template< typename MT3 // Type of the left-hand side target matrix
3251  , typename MT4 // Type of the left-hand side matrix operand
3252  , typename MT5 // Type of the right-hand side matrix operand
3253  , typename ST2 > // Type of the scalar value
3254  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3255  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3256  {
3259 
3260  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3261  const typename MT5::OppositeType tmp( B );
3262  subAssign( ~C, A * tmp * scalar );
3263  }
3264  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3265  const typename MT4::OppositeType tmp( A );
3266  subAssign( ~C, tmp * B * scalar );
3267  }
3268  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3269  const typename MT5::OppositeType tmp( B );
3270  subAssign( ~C, A * tmp * scalar );
3271  }
3272  else {
3273  const typename MT4::OppositeType tmp( A );
3274  subAssign( ~C, tmp * B * scalar );
3275  }
3276  }
3277  //**********************************************************************************************
3278 
3279  //**Vectorized default subtraction assignment to column-major dense matrices********************
3293  template< typename MT3 // Type of the left-hand side target matrix
3294  , typename MT4 // Type of the left-hand side matrix operand
3295  , typename MT5 // Type of the right-hand side matrix operand
3296  , typename ST2 > // Type of the scalar value
3297  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3298  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3299  {
3300  typedef IntrinsicTrait<ElementType> IT;
3301 
3302  const size_t M( A.rows() );
3303  const size_t N( B.columns() );
3304  const size_t K( A.columns() );
3305 
3306  const IntrinsicType factor( set( scalar ) );
3307 
3308  size_t i( 0UL );
3309 
3310  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3311  for( size_t j=0UL; j<N; ++j ) {
3312  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3313  for( size_t k=0UL; k<K; ++k ) {
3314  const IntrinsicType b1( set( B(k,j) ) );
3315  xmm1 = xmm1 + A.load(i ,k) * b1;
3316  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3317  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3318  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3319  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3320  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3321  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3322  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3323  }
3324  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3325  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3326  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3327  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3328  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) - xmm5 * factor );
3329  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) - xmm6 * factor );
3330  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) - xmm7 * factor );
3331  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) - xmm8 * factor );
3332  }
3333  }
3334  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3335  size_t j( 0UL );
3336  for( ; (j+2UL) <= N; j+=2UL ) {
3337  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3338  for( size_t k=0UL; k<K; ++k ) {
3339  const IntrinsicType a1( A.load(i ,k) );
3340  const IntrinsicType a2( A.load(i+IT::size ,k) );
3341  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
3342  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
3343  const IntrinsicType b1( set( B(k,j ) ) );
3344  const IntrinsicType b2( set( B(k,j+1UL) ) );
3345  xmm1 = xmm1 + a1 * b1;
3346  xmm2 = xmm2 + a2 * b1;
3347  xmm3 = xmm3 + a3 * b1;
3348  xmm4 = xmm4 + a4 * b1;
3349  xmm5 = xmm5 + a1 * b2;
3350  xmm6 = xmm6 + a2 * b2;
3351  xmm7 = xmm7 + a3 * b2;
3352  xmm8 = xmm8 + a4 * b2;
3353  }
3354  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3355  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) - xmm2 * factor );
3356  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) - xmm3 * factor );
3357  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) - xmm4 * factor );
3358  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
3359  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) - xmm6 * factor );
3360  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) - xmm7 * factor );
3361  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) - xmm8 * factor );
3362  }
3363  if( j < N ) {
3364  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3365  for( size_t k=0UL; k<K; ++k ) {
3366  const IntrinsicType b1( set( B(k,j) ) );
3367  xmm1 = xmm1 + A.load(i ,k) * b1;
3368  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3369  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3370  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3371  }
3372  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3373  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3374  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3375  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3376  }
3377  }
3378  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3379  size_t j( 0UL );
3380  for( ; (j+2UL) <= N; j+=2UL ) {
3381  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3382  for( size_t k=0UL; k<K; ++k ) {
3383  const IntrinsicType a1( A.load(i ,k) );
3384  const IntrinsicType a2( A.load(i+IT::size,k) );
3385  const IntrinsicType b1( set( B(k,j ) ) );
3386  const IntrinsicType b2( set( B(k,j+1UL) ) );
3387  xmm1 = xmm1 + a1 * b1;
3388  xmm2 = xmm2 + a2 * b1;
3389  xmm3 = xmm3 + a1 * b2;
3390  xmm4 = xmm4 + a2 * b2;
3391  }
3392  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3393  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) - xmm2 * factor );
3394  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
3395  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) - xmm4 * factor );
3396  }
3397  if( j < N ) {
3398  IntrinsicType xmm1, xmm2;
3399  for( size_t k=0UL; k<K; ++k ) {
3400  const IntrinsicType b1( set( B(k,j) ) );
3401  xmm1 = xmm1 + A.load(i ,k) * b1;
3402  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3403  }
3404  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3405  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) - xmm2 * factor );
3406  }
3407  }
3408  if( i < M ) {
3409  size_t j( 0UL );
3410  for( ; (j+2UL) <= N; j+=2UL ) {
3411  IntrinsicType xmm1, xmm2;
3412  for( size_t k=0UL; k<K; ++k ) {
3413  const IntrinsicType a1( A.load(i,k) );
3414  xmm1 = xmm1 + a1 * set( B(k,j ) );
3415  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3416  }
3417  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3418  (~C).store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
3419  }
3420  if( j < N ) {
3421  IntrinsicType xmm1;
3422  for( size_t k=0UL; k<K; ++k ) {
3423  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
3424  }
3425  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
3426  }
3427  }
3428  }
3429  //**********************************************************************************************
3430 
3431  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3445  template< typename MT3 // Type of the left-hand side target matrix
3446  , typename MT4 // Type of the left-hand side matrix operand
3447  , typename MT5 // Type of the right-hand side matrix operand
3448  , typename ST2 > // Type of the scalar value
3449  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3450  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3451  {
3452  selectDefaultSubAssignKernel( C, A, B, scalar );
3453  }
3454  //**********************************************************************************************
3455 
3456  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3457 #if BLAZE_BLAS_MODE
3458 
3471  template< typename MT3 // Type of the left-hand side target matrix
3472  , typename MT4 // Type of the left-hand side matrix operand
3473  , typename MT5 // Type of the right-hand side matrix operand
3474  , typename ST2 > // Type of the scalar value
3475  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3476  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3477  {
3478  using boost::numeric_cast;
3479 
3483 
3484  const int M ( numeric_cast<int>( A.rows() ) );
3485  const int N ( numeric_cast<int>( B.columns() ) );
3486  const int K ( numeric_cast<int>( A.columns() ) );
3487  const int lda( numeric_cast<int>( A.spacing() ) );
3488  const int ldb( numeric_cast<int>( B.spacing() ) );
3489  const int ldc( numeric_cast<int>( C.spacing() ) );
3490 
3491  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3492  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3493  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3494  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3495  }
3496 #endif
3497  //**********************************************************************************************
3498 
3499  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3500 #if BLAZE_BLAS_MODE
3501 
3514  template< typename MT3 // Type of the left-hand side target matrix
3515  , typename MT4 // Type of the left-hand side matrix operand
3516  , typename MT5 // Type of the right-hand side matrix operand
3517  , typename ST2 > // Type of the scalar value
3518  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3519  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3520  {
3521  using boost::numeric_cast;
3522 
3526 
3527  const int M ( numeric_cast<int>( A.rows() ) );
3528  const int N ( numeric_cast<int>( B.columns() ) );
3529  const int K ( numeric_cast<int>( A.columns() ) );
3530  const int lda( numeric_cast<int>( A.spacing() ) );
3531  const int ldb( numeric_cast<int>( B.spacing() ) );
3532  const int ldc( numeric_cast<int>( C.spacing() ) );
3533 
3534  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3535  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3536  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3537  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3538  }
3539 #endif
3540  //**********************************************************************************************
3541 
3542  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3543 #if BLAZE_BLAS_MODE
3544 
3557  template< typename MT3 // Type of the left-hand side target matrix
3558  , typename MT4 // Type of the left-hand side matrix operand
3559  , typename MT5 // Type of the right-hand side matrix operand
3560  , typename ST2 > // Type of the scalar value
3561  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3562  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3563  {
3564  using boost::numeric_cast;
3565 
3569  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3570  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3571  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3572 
3573  const int M ( numeric_cast<int>( A.rows() ) );
3574  const int N ( numeric_cast<int>( B.columns() ) );
3575  const int K ( numeric_cast<int>( A.columns() ) );
3576  const int lda( numeric_cast<int>( A.spacing() ) );
3577  const int ldb( numeric_cast<int>( B.spacing() ) );
3578  const int ldc( numeric_cast<int>( C.spacing() ) );
3579  const complex<float> alpha( -scalar );
3580  const complex<float> beta ( 1.0F, 0.0F );
3581 
3582  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3583  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3584  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3585  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3586  }
3587 #endif
3588  //**********************************************************************************************
3589 
3590  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3591 #if BLAZE_BLAS_MODE
3592 
3605  template< typename MT3 // Type of the left-hand side target matrix
3606  , typename MT4 // Type of the left-hand side matrix operand
3607  , typename MT5 // Type of the right-hand side matrix operand
3608  , typename ST2 > // Type of the scalar value
3609  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3610  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3611  {
3612  using boost::numeric_cast;
3613 
3617  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3618  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3619  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3620 
3621  const int M ( numeric_cast<int>( A.rows() ) );
3622  const int N ( numeric_cast<int>( B.columns() ) );
3623  const int K ( numeric_cast<int>( A.columns() ) );
3624  const int lda( numeric_cast<int>( A.spacing() ) );
3625  const int ldb( numeric_cast<int>( B.spacing() ) );
3626  const int ldc( numeric_cast<int>( C.spacing() ) );
3627  const complex<double> alpha( -scalar );
3628  const complex<double> beta ( 1.0, 0.0 );
3629 
3630  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3631  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3632  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3633  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3634  }
3635 #endif
3636  //**********************************************************************************************
3637 
3638  //**Subtraction assignment to sparse matrices***************************************************
3639  // No special implementation for the subtraction assignment to sparse matrices.
3640  //**********************************************************************************************
3641 
3642  //**Multiplication assignment to dense matrices*************************************************
3643  // No special implementation for the multiplication assignment to dense matrices.
3644  //**********************************************************************************************
3645 
3646  //**Multiplication assignment to sparse matrices************************************************
3647  // No special implementation for the multiplication assignment to sparse matrices.
3648  //**********************************************************************************************
3649 
3650  //**Compile time checks*************************************************************************
3659  //**********************************************************************************************
3660 };
3662 //*************************************************************************************************
3663 
3664 
3665 
3666 
3667 //=================================================================================================
3668 //
3669 // GLOBAL BINARY ARITHMETIC OPERATORS
3670 //
3671 //=================================================================================================
3672 
3673 //*************************************************************************************************
3699 template< typename T1 // Type of the left-hand side dense matrix
3700  , typename T2 > // Type of the right-hand side dense matrix
3701 inline const TDMatTDMatMultExpr<T1,T2>
3703 {
3705 
3706  if( (~lhs).columns() != (~rhs).rows() )
3707  throw std::invalid_argument( "Matrix sizes do not match" );
3708 
3709  return TDMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
3710 }
3711 //*************************************************************************************************
3712 
3713 
3714 
3715 
3716 //=================================================================================================
3717 //
3718 // EXPRESSION TRAIT SPECIALIZATIONS
3719 //
3720 //=================================================================================================
3721 
3722 //*************************************************************************************************
3724 template< typename MT1, typename MT2, typename VT >
3725 struct TDMatDVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
3726 {
3727  public:
3728  //**********************************************************************************************
3729  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3730  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3731  IsDenseVector<VT>::value && IsColumnVector<VT>::value
3732  , typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
3733  , INVALID_TYPE >::Type Type;
3734  //**********************************************************************************************
3735 };
3737 //*************************************************************************************************
3738 
3739 
3740 //*************************************************************************************************
3742 template< typename MT1, typename MT2, typename VT >
3743 struct TDMatSVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
3744 {
3745  public:
3746  //**********************************************************************************************
3747  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3748  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3749  IsSparseVector<VT>::value && IsColumnVector<VT>::value
3750  , typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
3751  , INVALID_TYPE >::Type Type;
3752  //**********************************************************************************************
3753 };
3755 //*************************************************************************************************
3756 
3757 
3758 //*************************************************************************************************
3760 template< typename VT, typename MT1, typename MT2 >
3761 struct TDVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
3762 {
3763  public:
3764  //**********************************************************************************************
3765  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
3766  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3767  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3768  , typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3769  , INVALID_TYPE >::Type Type;
3770  //**********************************************************************************************
3771 };
3773 //*************************************************************************************************
3774 
3775 
3776 //*************************************************************************************************
3778 template< typename VT, typename MT1, typename MT2 >
3779 struct TSVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
3780 {
3781  public:
3782  //**********************************************************************************************
3783  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
3784  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3785  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3786  , typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3787  , INVALID_TYPE >::Type Type;
3788  //**********************************************************************************************
3789 };
3791 //*************************************************************************************************
3792 
3793 
3794 //*************************************************************************************************
3796 template< typename MT1, typename MT2 >
3797 struct SubmatrixExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
3798 {
3799  public:
3800  //**********************************************************************************************
3801  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1>::Type
3802  , typename SubmatrixExprTrait<const MT2>::Type >::Type Type;
3803  //**********************************************************************************************
3804 };
3806 //*************************************************************************************************
3807 
3808 
3809 //*************************************************************************************************
3811 template< typename MT1, typename MT2 >
3812 struct RowExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
3813 {
3814  public:
3815  //**********************************************************************************************
3816  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
3817  //**********************************************************************************************
3818 };
3820 //*************************************************************************************************
3821 
3822 
3823 //*************************************************************************************************
3825 template< typename MT1, typename MT2 >
3826 struct ColumnExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
3827 {
3828  public:
3829  //**********************************************************************************************
3830  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
3831  //**********************************************************************************************
3832 };
3834 //*************************************************************************************************
3835 
3836 } // namespace blaze
3837 
3838 #endif
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:228
Data type constraint.
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:122
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:224
const size_t TDMATTDMATMULT_THRESHOLD
Column-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies t...
Definition: Thresholds.h:170
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:357
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:121
Header file for the IsColumnMajorMatrix type trait.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:219
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:266
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:246
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:127
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:338
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2371
Header file for the IsFloat type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:316
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:221
TDMatTDMatMultExpr< MT1, MT2 > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:218
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the multiplication trait.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:117
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TSVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:231
Header file for the TDMatSVecMultExprTrait class template.
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:326
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
MT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:120
SelectType< IsComputation< MT1 >::value, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:234
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:225
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:350
Header file for the IsNumeric type trait.
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:220
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:222
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:296
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
MT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:119
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:251
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:118
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the complex data type.
Header file for the IsColumnVector type trait.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
Header file for the IsResizable type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Operand matrix_
The dense matrix containing the submatrix.
Definition: DenseSubmatrix.h:2792
Header file for the TDVecTDMatMultExprTrait class template.
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
void store(float *address, const sse_float_t &value)
Aligned store of a vector of &#39;float&#39; values.
Definition: Store.h:242
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:306
Header file for the IsExpression type trait class.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTDMatMultExpr.h:223
SelectType< IsComputation< MT2 >::value, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:237
Header file for the FunctionTrace class.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:358