All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
74 #include <blaze/system/BLAS.h>
76 #include <blaze/util/Assert.h>
77 #include <blaze/util/Complex.h>
84 #include <blaze/util/DisableIf.h>
85 #include <blaze/util/EnableIf.h>
86 #include <blaze/util/InvalidType.h>
88 #include <blaze/util/SelectType.h>
89 #include <blaze/util/Types.h>
95 
96 
97 namespace blaze {
98 
99 //=================================================================================================
100 //
101 // CLASS DMATDMATMULTEXPR
102 //
103 //=================================================================================================
104 
105 //*************************************************************************************************
112 template< typename MT1 // Type of the left-hand side dense matrix
113  , typename MT2 > // Type of the right-hand side dense matrix
114 class DMatDMatMultExpr : public DenseMatrix< DMatDMatMultExpr<MT1,MT2>, false >
115  , private MatMatMultExpr
116  , private Computation
117 {
118  private:
119  //**Type definitions****************************************************************************
120  typedef typename MT1::ResultType RT1;
121  typedef typename MT2::ResultType RT2;
122  typedef typename MT1::CompositeType CT1;
123  typedef typename MT2::CompositeType CT2;
124  //**********************************************************************************************
125 
126  //**********************************************************************************************
128 
131  template< typename T1, typename T2, typename T3 >
132  struct UseSinglePrecisionKernel {
136  };
138  //**********************************************************************************************
139 
140  //**********************************************************************************************
142 
145  template< typename T1, typename T2, typename T3 >
146  struct UseDoublePrecisionKernel {
150  };
152  //**********************************************************************************************
153 
154  //**********************************************************************************************
156 
160  template< typename T1, typename T2, typename T3 >
161  struct UseSinglePrecisionComplexKernel {
162  typedef complex<float> Type;
163  enum { value = IsSame<typename T1::ElementType,Type>::value &&
164  IsSame<typename T2::ElementType,Type>::value &&
165  IsSame<typename T3::ElementType,Type>::value };
166  };
168  //**********************************************************************************************
169 
170  //**********************************************************************************************
172 
176  template< typename T1, typename T2, typename T3 >
177  struct UseDoublePrecisionComplexKernel {
178  typedef complex<double> Type;
179  enum { value = IsSame<typename T1::ElementType,Type>::value &&
180  IsSame<typename T2::ElementType,Type>::value &&
181  IsSame<typename T3::ElementType,Type>::value };
182  };
184  //**********************************************************************************************
185 
186  //**********************************************************************************************
188 
191  template< typename T1, typename T2, typename T3 >
192  struct UseDefaultKernel {
193  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
194  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
195  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
196  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
197  };
199  //**********************************************************************************************
200 
201  //**********************************************************************************************
203 
206  template< typename T1, typename T2, typename T3 >
207  struct UseVectorizedDefaultKernel {
208  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
209  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
210  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
211  IntrinsicTrait<typename T1::ElementType>::addition &&
212  IntrinsicTrait<typename T1::ElementType>::multiplication };
213  };
215  //**********************************************************************************************
216 
217  public:
218  //**Type definitions****************************************************************************
225  typedef const ElementType ReturnType;
226  typedef const ResultType CompositeType;
227 
229  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
230 
232  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
233 
235  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
236 
238  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
239  //**********************************************************************************************
240 
241  //**Compilation flags***************************************************************************
243  enum { vectorizable = 0 };
244  //**********************************************************************************************
245 
246  //**Constructor*********************************************************************************
252  explicit inline DMatDMatMultExpr( const MT1& lhs, const MT2& rhs )
253  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
254  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
255  {
256  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
257  }
258  //**********************************************************************************************
259 
260  //**Access operator*****************************************************************************
267  inline ReturnType operator()( size_t i, size_t j ) const {
268  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
269  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
270 
271  ElementType tmp;
272 
273  if( lhs_.columns() != 0UL ) {
274  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
275  tmp = lhs_(i,0UL) * rhs_(0UL,j);
276  for( size_t k=1UL; k<end; k+=2UL ) {
277  tmp += lhs_(i,k ) * rhs_(k ,j);
278  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
279  }
280  if( end < lhs_.columns() ) {
281  tmp += lhs_(i,end) * rhs_(end,j);
282  }
283  }
284  else {
285  reset( tmp );
286  }
287 
288  return tmp;
289  }
290  //**********************************************************************************************
291 
292  //**Rows function*******************************************************************************
297  inline size_t rows() const {
298  return lhs_.rows();
299  }
300  //**********************************************************************************************
301 
302  //**Columns function****************************************************************************
307  inline size_t columns() const {
308  return rhs_.columns();
309  }
310  //**********************************************************************************************
311 
312  //**Left operand access*************************************************************************
317  inline LeftOperand leftOperand() const {
318  return lhs_;
319  }
320  //**********************************************************************************************
321 
322  //**Right operand access************************************************************************
327  inline RightOperand rightOperand() const {
328  return rhs_;
329  }
330  //**********************************************************************************************
331 
332  //**********************************************************************************************
338  template< typename T >
339  inline bool canAlias( const T* alias ) const {
340  return ( lhs_.canAlias( alias ) || rhs_.canAlias( alias ) );
341  }
342  //**********************************************************************************************
343 
344  //**********************************************************************************************
350  template< typename T >
351  inline bool isAliased( const T* alias ) const {
352  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
353  }
354  //**********************************************************************************************
355 
356  private:
357  //**Member variables****************************************************************************
360  //**********************************************************************************************
361 
362  //**Assignment to dense matrices****************************************************************
372  template< typename MT3 // Type of the target dense matrix
373  , bool SO > // Storage order of the target dense matrix
374  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
375  {
377 
378  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
379  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
380 
381  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
382  return;
383  }
384  else if( rhs.lhs_.columns() == 0UL ) {
385  reset( ~lhs );
386  return;
387  }
388 
389  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
390  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
391 
392  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
393  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
394  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
395  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
396  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
397  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
398 
399  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
400  DMatDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
401  else
402  DMatDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
403  }
405  //**********************************************************************************************
406 
407  //**Default assignment to dense matrices********************************************************
420  template< typename MT3 // Type of the left-hand side target matrix
421  , typename MT4 // Type of the left-hand side matrix operand
422  , typename MT5 > // Type of the right-hand side matrix operand
423  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
424  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
425  {
426  const size_t M( A.rows() );
427  const size_t N( B.columns() );
428  const size_t K( A.columns() );
429 
430  for( size_t i=0UL; i<M; ++i ) {
431  for( size_t j=0UL; j<N; ++j ) {
432  C(i,j) = A(i,0UL) * B(0UL,j);
433  }
434  for( size_t k=1UL; k<K; ++k ) {
435  for( size_t j=0UL; j<N; ++j ) {
436  C(i,j) += A(i,k) * B(k,j);
437  }
438  }
439  }
440  }
442  //**********************************************************************************************
443 
444  //**Vectorized default assignment to row-major dense matrices***********************************
458  template< typename MT3 // Type of the left-hand side target matrix
459  , typename MT4 // Type of the left-hand side matrix operand
460  , typename MT5 > // Type of the right-hand side matrix operand
461  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
462  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
463  {
464  typedef IntrinsicTrait<ElementType> IT;
465 
466  const size_t M( A.rows() );
467  const size_t N( B.columns() );
468  const size_t K( A.columns() );
469 
470  size_t j( 0UL );
471 
472  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
473  for( size_t i=0UL; i<M; ++i ) {
474  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
475  for( size_t k=0UL; k<K; ++k ) {
476  const IntrinsicType a1( set( A(i,k) ) );
477  xmm1 = xmm1 + a1 * B.load(k,j );
478  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
479  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
480  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
481  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
482  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
483  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
484  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
485  }
486  (~C).store( i, j , xmm1 );
487  (~C).store( i, j+IT::size , xmm2 );
488  (~C).store( i, j+IT::size*2UL, xmm3 );
489  (~C).store( i, j+IT::size*3UL, xmm4 );
490  (~C).store( i, j+IT::size*4UL, xmm5 );
491  (~C).store( i, j+IT::size*5UL, xmm6 );
492  (~C).store( i, j+IT::size*6UL, xmm7 );
493  (~C).store( i, j+IT::size*7UL, xmm8 );
494  }
495  }
496  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
497  size_t i( 0UL );
498  for( ; (i+2UL) <= M; i+=2UL ) {
499  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
500  for( size_t k=0UL; k<K; ++k ) {
501  const IntrinsicType a1( set( A(i ,k) ) );
502  const IntrinsicType a2( set( A(i+1UL,k) ) );
503  const IntrinsicType b1( B.load(k,j ) );
504  const IntrinsicType b2( B.load(k,j+IT::size ) );
505  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
506  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
507  xmm1 = xmm1 + a1 * b1;
508  xmm2 = xmm2 + a1 * b2;
509  xmm3 = xmm3 + a1 * b3;
510  xmm4 = xmm4 + a1 * b4;
511  xmm5 = xmm5 + a2 * b1;
512  xmm6 = xmm6 + a2 * b2;
513  xmm7 = xmm7 + a2 * b3;
514  xmm8 = xmm8 + a2 * b4;
515  }
516  (~C).store( i , j , xmm1 );
517  (~C).store( i , j+IT::size , xmm2 );
518  (~C).store( i , j+IT::size*2UL, xmm3 );
519  (~C).store( i , j+IT::size*3UL, xmm4 );
520  (~C).store( i+1UL, j , xmm5 );
521  (~C).store( i+1UL, j+IT::size , xmm6 );
522  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
523  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
524  }
525  if( i < M ) {
526  IntrinsicType xmm1, xmm2, xmm3, xmm4;
527  for( size_t k=0UL; k<K; ++k ) {
528  const IntrinsicType a1( set( A(i,k) ) );
529  xmm1 = xmm1 + a1 * B.load(k,j );
530  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
531  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
532  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
533  }
534  (~C).store( i, j , xmm1 );
535  (~C).store( i, j+IT::size , xmm2 );
536  (~C).store( i, j+IT::size*2UL, xmm3 );
537  (~C).store( i, j+IT::size*3UL, xmm4 );
538  }
539  }
540  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
541  size_t i( 0UL );
542  for( ; (i+2UL) <= M; i+=2UL ) {
543  IntrinsicType xmm1, xmm2, xmm3, xmm4;
544  for( size_t k=0UL; k<K; ++k ) {
545  const IntrinsicType a1( set( A(i ,k) ) );
546  const IntrinsicType a2( set( A(i+1UL,k) ) );
547  const IntrinsicType b1( B.load(k,j ) );
548  const IntrinsicType b2( B.load(k,j+IT::size) );
549  xmm1 = xmm1 + a1 * b1;
550  xmm2 = xmm2 + a1 * b2;
551  xmm3 = xmm3 + a2 * b1;
552  xmm4 = xmm4 + a2 * b2;
553  }
554  (~C).store( i , j , xmm1 );
555  (~C).store( i , j+IT::size, xmm2 );
556  (~C).store( i+1UL, j , xmm3 );
557  (~C).store( i+1UL, j+IT::size, xmm4 );
558  }
559  if( i < M ) {
560  IntrinsicType xmm1, xmm2;
561  for( size_t k=0UL; k<K; ++k ) {
562  const IntrinsicType a1( set( A(i,k) ) );
563  xmm1 = xmm1 + a1 * B.load(k,j );
564  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
565  }
566  (~C).store( i, j , xmm1 );
567  (~C).store( i, j+IT::size, xmm2 );
568  }
569  }
570  if( j < N ) {
571  size_t i( 0UL );
572  for( ; (i+2UL) <= M; i+=2UL ) {
573  IntrinsicType xmm1, xmm2;
574  for( size_t k=0UL; k<K; ++k ) {
575  const IntrinsicType b1( B.load(k,j) );
576  xmm1 = xmm1 + set( A(i ,k) ) * b1;
577  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
578  }
579  (~C).store( i , j, xmm1 );
580  (~C).store( i+1UL, j, xmm2 );
581  }
582  if( i < M ) {
583  IntrinsicType xmm1;
584  for( size_t k=0UL; k<K; ++k ) {
585  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
586  }
587  (~C).store( i, j, xmm1 );
588  }
589  }
590  }
592  //**********************************************************************************************
593 
594  //**Vectorized default assignment to column-major dense matrices********************************
608  template< typename MT3 // Type of the left-hand side target matrix
609  , typename MT4 // Type of the left-hand side matrix operand
610  , typename MT5 > // Type of the right-hand side matrix operand
611  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
612  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
613  {
616 
617  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
618  const typename MT4::OppositeType tmp( A );
619  assign( ~C, tmp * B );
620  }
621  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
622  const typename MT5::OppositeType tmp( B );
623  assign( ~C, A * tmp );
624  }
625  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
626  const typename MT4::OppositeType tmp( A );
627  assign( ~C, tmp * B );
628  }
629  else {
630  const typename MT5::OppositeType tmp( B );
631  assign( ~C, A * tmp );
632  }
633  }
635  //**********************************************************************************************
636 
637  //**BLAS-based assignment to dense matrices (default)*******************************************
650  template< typename MT3 // Type of the left-hand side target matrix
651  , typename MT4 // Type of the left-hand side matrix operand
652  , typename MT5 > // Type of the right-hand side matrix operand
653  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
654  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
655  {
656  selectDefaultAssignKernel( C, A, B );
657  }
659  //**********************************************************************************************
660 
661  //**BLAS-based assignment to dense matrices (single precision)**********************************
662 #if BLAZE_BLAS_MODE
663 
676  template< typename MT3 // Type of the left-hand side target matrix
677  , typename MT4 // Type of the left-hand side matrix operand
678  , typename MT5 > // Type of the right-hand side matrix operand
679  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
680  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
681  {
682  using boost::numeric_cast;
683 
687 
688  const int M ( numeric_cast<int>( A.rows() ) );
689  const int N ( numeric_cast<int>( B.columns() ) );
690  const int K ( numeric_cast<int>( A.columns() ) );
691  const int lda( numeric_cast<int>( A.spacing() ) );
692  const int ldb( numeric_cast<int>( B.spacing() ) );
693  const int ldc( numeric_cast<int>( C.spacing() ) );
694 
695  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
696  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
697  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
698  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
699  }
701 #endif
702  //**********************************************************************************************
703 
704  //**BLAS-based assignment to dense matrices (double precision)**********************************
705 #if BLAZE_BLAS_MODE
706 
719  template< typename MT3 // Type of the left-hand side target matrix
720  , typename MT4 // Type of the left-hand side matrix operand
721  , typename MT5 > // Type of the right-hand side matrix operand
722  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
723  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
724  {
725  using boost::numeric_cast;
726 
730 
731  const int M ( numeric_cast<int>( A.rows() ) );
732  const int N ( numeric_cast<int>( B.columns() ) );
733  const int K ( numeric_cast<int>( A.columns() ) );
734  const int lda( numeric_cast<int>( A.spacing() ) );
735  const int ldb( numeric_cast<int>( B.spacing() ) );
736  const int ldc( numeric_cast<int>( C.spacing() ) );
737 
738  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
739  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
740  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
741  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
742  }
744 #endif
745  //**********************************************************************************************
746 
747  //**BLAS-based assignment to dense matrices (single precision complex)**************************
748 #if BLAZE_BLAS_MODE
749 
762  template< typename MT3 // Type of the left-hand side target matrix
763  , typename MT4 // Type of the left-hand side matrix operand
764  , typename MT5 > // Type of the right-hand side matrix operand
765  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
766  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
767  {
768  using boost::numeric_cast;
769 
773  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
774  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
775  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
776 
777  const int M ( numeric_cast<int>( A.rows() ) );
778  const int N ( numeric_cast<int>( B.columns() ) );
779  const int K ( numeric_cast<int>( A.columns() ) );
780  const int lda( numeric_cast<int>( A.spacing() ) );
781  const int ldb( numeric_cast<int>( B.spacing() ) );
782  const int ldc( numeric_cast<int>( C.spacing() ) );
783  const complex<float> alpha( 1.0F, 0.0F );
784  const complex<float> beta ( 0.0F, 0.0F );
785 
786  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
787  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
788  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
789  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
790  }
792 #endif
793  //**********************************************************************************************
794 
795  //**BLAS-based assignment to dense matrices (double precision complex)**************************
796 #if BLAZE_BLAS_MODE
797 
810  template< typename MT3 // Type of the left-hand side target matrix
811  , typename MT4 // Type of the left-hand side matrix operand
812  , typename MT5 > // Type of the right-hand side matrix operand
813  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
814  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
815  {
816  using boost::numeric_cast;
817 
821  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
822  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
823  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
824 
825  const int M ( numeric_cast<int>( A.rows() ) );
826  const int N ( numeric_cast<int>( B.columns() ) );
827  const int K ( numeric_cast<int>( A.columns() ) );
828  const int lda( numeric_cast<int>( A.spacing() ) );
829  const int ldb( numeric_cast<int>( B.spacing() ) );
830  const int ldc( numeric_cast<int>( C.spacing() ) );
831  const complex<double> alpha( 1.0, 0.0 );
832  const complex<double> beta ( 0.0, 0.0 );
833 
834  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
835  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
836  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
837  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
838  }
840 #endif
841  //**********************************************************************************************
842 
843  //**Assignment to sparse matrices***************************************************************
855  template< typename MT // Type of the target sparse matrix
856  , bool SO > // Storage order of the target sparse matrix
857  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
858  {
860 
861  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
862 
869 
870  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
871  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
872 
873  const TmpType tmp( rhs );
874  assign( ~lhs, tmp );
875  }
877  //**********************************************************************************************
878 
879  //**Addition assignment to dense matrices*******************************************************
892  template< typename MT3 // Type of the target dense matrix
893  , bool SO > // Storage order of the target dense matrix
894  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
895  {
897 
898  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
899  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
900 
901  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
902  return;
903  }
904 
905  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
906  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
907 
908  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
909  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
910  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
911  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
912  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
913  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
914 
915  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
916  DMatDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
917  else
918  DMatDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
919  }
921  //**********************************************************************************************
922 
923  //**Default addition assignment to dense matrices***********************************************
937  template< typename MT3 // Type of the left-hand side target matrix
938  , typename MT4 // Type of the left-hand side matrix operand
939  , typename MT5 > // Type of the right-hand side matrix operand
940  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
941  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
942  {
943  const size_t M( A.rows() );
944  const size_t N( B.columns() );
945  const size_t K( A.columns() );
946 
947  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
948  const size_t end( N & size_t(-2) );
949 
950  for( size_t i=0UL; i<M; ++i ) {
951  for( size_t k=0UL; k<K; ++k ) {
952  for( size_t j=0UL; j<end; j+=2UL ) {
953  C(i,j ) += A(i,k) * B(k,j );
954  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
955  }
956  if( end < N ) {
957  C(i,end) += A(i,k) * B(k,end);
958  }
959  }
960  }
961  }
963  //**********************************************************************************************
964 
965  //**Vectorized default addition assignment to row-major dense matrices**************************
979  template< typename MT3 // Type of the left-hand side target matrix
980  , typename MT4 // Type of the left-hand side matrix operand
981  , typename MT5 > // Type of the right-hand side matrix operand
982  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
983  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
984  {
985  typedef IntrinsicTrait<ElementType> IT;
986 
987  const size_t M( A.rows() );
988  const size_t N( B.columns() );
989  const size_t K( A.columns() );
990 
991  size_t j( 0UL );
992 
993  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
994  for( size_t i=0UL; i<M; ++i ) {
995  IntrinsicType xmm1( (~C).load(i,j ) );
996  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
997  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
998  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
999  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1000  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1001  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1002  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1003  for( size_t k=0UL; k<K; ++k ) {
1004  const IntrinsicType a1( set( A(i,k) ) );
1005  xmm1 = xmm1 + a1 * B.load(k,j );
1006  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1007  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1008  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1009  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
1010  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
1011  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
1012  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
1013  }
1014  (~C).store( i, j , xmm1 );
1015  (~C).store( i, j+IT::size , xmm2 );
1016  (~C).store( i, j+IT::size*2UL, xmm3 );
1017  (~C).store( i, j+IT::size*3UL, xmm4 );
1018  (~C).store( i, j+IT::size*4UL, xmm5 );
1019  (~C).store( i, j+IT::size*5UL, xmm6 );
1020  (~C).store( i, j+IT::size*6UL, xmm7 );
1021  (~C).store( i, j+IT::size*7UL, xmm8 );
1022  }
1023  }
1024  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1025  size_t i( 0UL );
1026  for( ; (i+2UL) <= M; i+=2UL ) {
1027  IntrinsicType xmm1( (~C).load(i ,j ) );
1028  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1029  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1030  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1031  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1032  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1033  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1034  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1035  for( size_t k=0UL; k<K; ++k ) {
1036  const IntrinsicType a1( set( A(i ,k) ) );
1037  const IntrinsicType a2( set( A(i+1UL,k) ) );
1038  const IntrinsicType b1( B.load(k,j ) );
1039  const IntrinsicType b2( B.load(k,j+IT::size ) );
1040  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
1041  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
1042  xmm1 = xmm1 + a1 * b1;
1043  xmm2 = xmm2 + a1 * b2;
1044  xmm3 = xmm3 + a1 * b3;
1045  xmm4 = xmm4 + a1 * b4;
1046  xmm5 = xmm5 + a2 * b1;
1047  xmm6 = xmm6 + a2 * b2;
1048  xmm7 = xmm7 + a2 * b3;
1049  xmm8 = xmm8 + a2 * b4;
1050  }
1051  (~C).store( i , j , xmm1 );
1052  (~C).store( i , j+IT::size , xmm2 );
1053  (~C).store( i , j+IT::size*2UL, xmm3 );
1054  (~C).store( i , j+IT::size*3UL, xmm4 );
1055  (~C).store( i+1UL, j , xmm5 );
1056  (~C).store( i+1UL, j+IT::size , xmm6 );
1057  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
1058  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
1059  }
1060  if( i < M ) {
1061  IntrinsicType xmm1( (~C).load(i,j ) );
1062  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1063  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1064  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1065  for( size_t k=0UL; k<K; ++k ) {
1066  const IntrinsicType a1( set( A(i,k) ) );
1067  xmm1 = xmm1 + a1 * B.load(k,j );
1068  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1069  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1070  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1071  }
1072  (~C).store( i, j , xmm1 );
1073  (~C).store( i, j+IT::size , xmm2 );
1074  (~C).store( i, j+IT::size*2UL, xmm3 );
1075  (~C).store( i, j+IT::size*3UL, xmm4 );
1076  }
1077  }
1078  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1079  size_t i( 0UL );
1080  for( ; (i+2UL) <= M; i+=2UL ) {
1081  IntrinsicType xmm1( (~C).load(i ,j ) );
1082  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
1083  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1084  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
1085  for( size_t k=0UL; k<K; ++k ) {
1086  const IntrinsicType a1( set( A(i ,k) ) );
1087  const IntrinsicType a2( set( A(i+1UL,k) ) );
1088  const IntrinsicType b1( B.load(k,j ) );
1089  const IntrinsicType b2( B.load(k,j+IT::size) );
1090  xmm1 = xmm1 + a1 * b1;
1091  xmm2 = xmm2 + a1 * b2;
1092  xmm3 = xmm3 + a2 * b1;
1093  xmm4 = xmm4 + a2 * b2;
1094  }
1095  (~C).store( i , j , xmm1 );
1096  (~C).store( i , j+IT::size, xmm2 );
1097  (~C).store( i+1UL, j , xmm3 );
1098  (~C).store( i+1UL, j+IT::size, xmm4 );
1099  }
1100  if( i < M ) {
1101  IntrinsicType xmm1( (~C).load(i,j ) );
1102  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
1103  for( size_t k=0UL; k<K; ++k ) {
1104  const IntrinsicType a1( set( A(i,k) ) );
1105  xmm1 = xmm1 + a1 * B.load(k,j );
1106  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
1107  }
1108  (~C).store( i, j , xmm1 );
1109  (~C).store( i, j+IT::size, xmm2 );
1110  }
1111  }
1112  if( j < N ) {
1113  size_t i( 0UL );
1114  for( ; (i+2UL) <= M; i+=2UL ) {
1115  IntrinsicType xmm1( (~C).load(i ,j) );
1116  IntrinsicType xmm2( (~C).load(i+1UL,j) );
1117  for( size_t k=0UL; k<K; ++k ) {
1118  const IntrinsicType b1( B.load(k,j) );
1119  xmm1 = xmm1 + set( A(i ,k) ) * b1;
1120  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
1121  }
1122  (~C).store( i , j, xmm1 );
1123  (~C).store( i+1UL, j, xmm2 );
1124  }
1125  if( i < M ) {
1126  IntrinsicType xmm1( (~C).load(i,j) );
1127  for( size_t k=0UL; k<K; ++k ) {
1128  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
1129  }
1130  (~C).store( i, j, xmm1 );
1131  }
1132  }
1133  }
1135  //**********************************************************************************************
1136 
1137  //**Vectorized default addition assignment to column-major dense matrices***********************
1151  template< typename MT3 // Type of the left-hand side target matrix
1152  , typename MT4 // Type of the left-hand side matrix operand
1153  , typename MT5 > // Type of the right-hand side matrix operand
1154  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1155  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1156  {
1159 
1160  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1161  const typename MT4::OppositeType tmp( A );
1162  addAssign( ~C, tmp * B );
1163  }
1164  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1165  const typename MT5::OppositeType tmp( B );
1166  addAssign( ~C, A * tmp );
1167  }
1168  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1169  const typename MT4::OppositeType tmp( A );
1170  addAssign( ~C, tmp * B );
1171  }
1172  else {
1173  const typename MT5::OppositeType tmp( B );
1174  addAssign( ~C, A * tmp );
1175  }
1176  }
1178  //**********************************************************************************************
1179 
1180  //**BLAS-based addition assignment to dense matrices (default)**********************************
1194  template< typename MT3 // Type of the left-hand side target matrix
1195  , typename MT4 // Type of the left-hand side matrix operand
1196  , typename MT5 > // Type of the right-hand side matrix operand
1197  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1198  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1199  {
1200  selectDefaultAddAssignKernel( C, A, B );
1201  }
1203  //**********************************************************************************************
1204 
1205  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1206 #if BLAZE_BLAS_MODE
1207 
1220  template< typename MT3 // Type of the left-hand side target matrix
1221  , typename MT4 // Type of the left-hand side matrix operand
1222  , typename MT5 > // Type of the right-hand side matrix operand
1223  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1224  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1225  {
1226  using boost::numeric_cast;
1227 
1231 
1232  const int M ( numeric_cast<int>( A.rows() ) );
1233  const int N ( numeric_cast<int>( B.columns() ) );
1234  const int K ( numeric_cast<int>( A.columns() ) );
1235  const int lda( numeric_cast<int>( A.spacing() ) );
1236  const int ldb( numeric_cast<int>( B.spacing() ) );
1237  const int ldc( numeric_cast<int>( C.spacing() ) );
1238 
1239  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1240  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1241  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1242  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1243  }
1245 #endif
1246  //**********************************************************************************************
1247 
1248  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1249 #if BLAZE_BLAS_MODE
1250 
1263  template< typename MT3 // Type of the left-hand side target matrix
1264  , typename MT4 // Type of the left-hand side matrix operand
1265  , typename MT5 > // Type of the right-hand side matrix operand
1266  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1267  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1268  {
1269  using boost::numeric_cast;
1270 
1274 
1275  const int M ( numeric_cast<int>( A.rows() ) );
1276  const int N ( numeric_cast<int>( B.columns() ) );
1277  const int K ( numeric_cast<int>( A.columns() ) );
1278  const int lda( numeric_cast<int>( A.spacing() ) );
1279  const int ldb( numeric_cast<int>( B.spacing() ) );
1280  const int ldc( numeric_cast<int>( C.spacing() ) );
1281 
1282  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1283  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1284  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1285  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1286  }
1288 #endif
1289  //**********************************************************************************************
1290 
1291  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1292 #if BLAZE_BLAS_MODE
1293 
1306  template< typename MT3 // Type of the left-hand side target matrix
1307  , typename MT4 // Type of the left-hand side matrix operand
1308  , typename MT5 > // Type of the right-hand side matrix operand
1309  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1310  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1311  {
1312  using boost::numeric_cast;
1313 
1317  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1318  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1319  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1320 
1321  const int M ( numeric_cast<int>( A.rows() ) );
1322  const int N ( numeric_cast<int>( B.columns() ) );
1323  const int K ( numeric_cast<int>( A.columns() ) );
1324  const int lda( numeric_cast<int>( A.spacing() ) );
1325  const int ldb( numeric_cast<int>( B.spacing() ) );
1326  const int ldc( numeric_cast<int>( C.spacing() ) );
1327  const complex<float> alpha( 1.0F, 0.0F );
1328  const complex<float> beta ( 1.0F, 0.0F );
1329 
1330  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1331  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1332  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1333  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1334  }
1336 #endif
1337  //**********************************************************************************************
1338 
1339  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1340 #if BLAZE_BLAS_MODE
1341 
1354  template< typename MT3 // Type of the left-hand side target matrix
1355  , typename MT4 // Type of the left-hand side matrix operand
1356  , typename MT5 > // Type of the right-hand side matrix operand
1357  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1358  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1359  {
1360  using boost::numeric_cast;
1361 
1365  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1366  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1367  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1368 
1369  const int M ( numeric_cast<int>( A.rows() ) );
1370  const int N ( numeric_cast<int>( B.columns() ) );
1371  const int K ( numeric_cast<int>( A.columns() ) );
1372  const int lda( numeric_cast<int>( A.spacing() ) );
1373  const int ldb( numeric_cast<int>( B.spacing() ) );
1374  const int ldc( numeric_cast<int>( C.spacing() ) );
1375  const complex<double> alpha( 1.0, 0.0 );
1376  const complex<double> beta ( 1.0, 0.0 );
1377 
1378  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1379  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1380  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1381  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1382  }
1384 #endif
1385  //**********************************************************************************************
1386 
1387  //**Addition assignment to sparse matrices******************************************************
1388  // No special implementation for the addition assignment to sparse matrices.
1389  //**********************************************************************************************
1390 
1391  //**Subtraction assignment to dense matrices****************************************************
1404  template< typename MT3 // Type of the target dense matrix
1405  , bool SO > // Storage order of the target dense matrix
1406  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
1407  {
1409 
1410  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1411  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1412 
1413  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1414  return;
1415  }
1416 
1417  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1418  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1419 
1420  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1421  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1422  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1423  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1424  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1425  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1426 
1427  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
1428  DMatDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1429  else
1430  DMatDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1431  }
1433  //**********************************************************************************************
1434 
1435  //**Default subtraction assignment to dense matrices********************************************
1449  template< typename MT3 // Type of the left-hand side target matrix
1450  , typename MT4 // Type of the left-hand side matrix operand
1451  , typename MT5 > // Type of the right-hand side matrix operand
1452  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1453  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1454  {
1455  const size_t M( A.rows() );
1456  const size_t N( B.columns() );
1457  const size_t K( A.columns() );
1458 
1459  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1460  const size_t end( N & size_t(-2) );
1461 
1462  for( size_t i=0UL; i<M; ++i ) {
1463  for( size_t k=0UL; k<K; ++k ) {
1464  for( size_t j=0UL; j<end; j+=2UL ) {
1465  C(i,j ) -= A(i,k) * B(k,j );
1466  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1467  }
1468  if( end < N ) {
1469  C(i,end) -= A(i,k) * B(k,end);
1470  }
1471  }
1472  }
1473  }
1475  //**********************************************************************************************
1476 
1477  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1491  template< typename MT3 // Type of the left-hand side target matrix
1492  , typename MT4 // Type of the left-hand side matrix operand
1493  , typename MT5 > // Type of the right-hand side matrix operand
1494  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1495  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1496  {
1497  typedef IntrinsicTrait<ElementType> IT;
1498 
1499  const size_t M( A.rows() );
1500  const size_t N( B.columns() );
1501  const size_t K( A.columns() );
1502 
1503  size_t j( 0UL );
1504 
1505  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1506  for( size_t i=0UL; i<M; ++i ) {
1507  IntrinsicType xmm1( (~C).load(i,j ) );
1508  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1509  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1510  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1511  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1512  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1513  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1514  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1515  for( size_t k=0UL; k<K; ++k ) {
1516  const IntrinsicType a1( set( A(i,k) ) );
1517  xmm1 = xmm1 - a1 * B.load(k,j );
1518  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1519  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1520  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1521  xmm5 = xmm5 - a1 * B.load(k,j+IT::size*4UL);
1522  xmm6 = xmm6 - a1 * B.load(k,j+IT::size*5UL);
1523  xmm7 = xmm7 - a1 * B.load(k,j+IT::size*6UL);
1524  xmm8 = xmm8 - a1 * B.load(k,j+IT::size*7UL);
1525  }
1526  (~C).store( i, j , xmm1 );
1527  (~C).store( i, j+IT::size , xmm2 );
1528  (~C).store( i, j+IT::size*2UL, xmm3 );
1529  (~C).store( i, j+IT::size*3UL, xmm4 );
1530  (~C).store( i, j+IT::size*4UL, xmm5 );
1531  (~C).store( i, j+IT::size*5UL, xmm6 );
1532  (~C).store( i, j+IT::size*6UL, xmm7 );
1533  (~C).store( i, j+IT::size*7UL, xmm8 );
1534  }
1535  }
1536  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1537  size_t i( 0UL );
1538  for( ; (i+2UL) <= M; i+=2UL ) {
1539  IntrinsicType xmm1( (~C).load(i ,j ) );
1540  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1541  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1542  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1543  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1544  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1545  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1546  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1547  for( size_t k=0UL; k<K; ++k ) {
1548  const IntrinsicType a1( set( A(i ,k) ) );
1549  const IntrinsicType a2( set( A(i+1UL,k) ) );
1550  const IntrinsicType b1( B.load(k,j ) );
1551  const IntrinsicType b2( B.load(k,j+IT::size ) );
1552  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
1553  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
1554  xmm1 = xmm1 - a1 * b1;
1555  xmm2 = xmm2 - a1 * b2;
1556  xmm3 = xmm3 - a1 * b3;
1557  xmm4 = xmm4 - a1 * b4;
1558  xmm5 = xmm5 - a2 * b1;
1559  xmm6 = xmm6 - a2 * b2;
1560  xmm7 = xmm7 - a2 * b3;
1561  xmm8 = xmm8 - a2 * b4;
1562  }
1563  (~C).store( i , j , xmm1 );
1564  (~C).store( i , j+IT::size , xmm2 );
1565  (~C).store( i , j+IT::size*2UL, xmm3 );
1566  (~C).store( i , j+IT::size*3UL, xmm4 );
1567  (~C).store( i+1UL, j , xmm5 );
1568  (~C).store( i+1UL, j+IT::size , xmm6 );
1569  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
1570  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
1571  }
1572  if( i < M ) {
1573  IntrinsicType xmm1( (~C).load(i,j ) );
1574  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1575  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1576  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1577  for( size_t k=0UL; k<K; ++k ) {
1578  const IntrinsicType a1( set( A(i,k) ) );
1579  xmm1 = xmm1 - a1 * B.load(k,j );
1580  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1581  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1582  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1583  }
1584  (~C).store( i, j , xmm1 );
1585  (~C).store( i, j+IT::size , xmm2 );
1586  (~C).store( i, j+IT::size*2UL, xmm3 );
1587  (~C).store( i, j+IT::size*3UL, xmm4 );
1588  }
1589  }
1590  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1591  size_t i( 0UL );
1592  for( ; (i+2UL) <= M; i+=2UL ) {
1593  IntrinsicType xmm1( (~C).load(i ,j ) );
1594  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
1595  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1596  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
1597  for( size_t k=0UL; k<K; ++k ) {
1598  const IntrinsicType a1( set( A(i ,k) ) );
1599  const IntrinsicType a2( set( A(i+1UL,k) ) );
1600  const IntrinsicType b1( B.load(k,j ) );
1601  const IntrinsicType b2( B.load(k,j+IT::size) );
1602  xmm1 = xmm1 - a1 * b1;
1603  xmm2 = xmm2 - a1 * b2;
1604  xmm3 = xmm3 - a2 * b1;
1605  xmm4 = xmm4 - a2 * b2;
1606  }
1607  (~C).store( i , j , xmm1 );
1608  (~C).store( i , j+IT::size, xmm2 );
1609  (~C).store( i+1UL, j , xmm3 );
1610  (~C).store( i+1UL, j+IT::size, xmm4 );
1611  }
1612  if( i < M ) {
1613  IntrinsicType xmm1( (~C).load(i,j ) );
1614  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
1615  for( size_t k=0UL; k<K; ++k ) {
1616  const IntrinsicType a1( set( A(i,k) ) );
1617  xmm1 = xmm1 - a1 * B.load(k,j );
1618  xmm2 = xmm2 - a1 * B.load(k,j+IT::size);
1619  }
1620  (~C).store( i, j , xmm1 );
1621  (~C).store( i, j+IT::size, xmm2 );
1622  }
1623  }
1624  if( j < N ) {
1625  size_t i( 0UL );
1626  for( ; (i+2UL) <= M; i+=2UL ) {
1627  IntrinsicType xmm1( (~C).load(i ,j) );
1628  IntrinsicType xmm2( (~C).load(i+1UL,j) );
1629  for( size_t k=0UL; k<K; ++k ) {
1630  const IntrinsicType b1( B.load(k,j) );
1631  xmm1 = xmm1 - set( A(i ,k) ) * b1;
1632  xmm2 = xmm2 - set( A(i+1UL,k) ) * b1;
1633  }
1634  (~C).store( i , j, xmm1 );
1635  (~C).store( i+1UL, j, xmm2 );
1636  }
1637  if( i < M ) {
1638  IntrinsicType xmm1( (~C).load(i,j) );
1639  for( size_t k=0UL; k<K; ++k ) {
1640  xmm1 = xmm1 - set( A(i,k) ) * B.load(k,j);
1641  }
1642  (~C).store( i, j, xmm1 );
1643  }
1644  }
1645  }
1647  //**********************************************************************************************
1648 
1649  //**Vectorized default subtraction assignment to column-major dense matrices********************
1663  template< typename MT3 // Type of the left-hand side target matrix
1664  , typename MT4 // Type of the left-hand side matrix operand
1665  , typename MT5 > // Type of the right-hand side matrix operand
1666  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1667  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1668  {
1671 
1672  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1673  const typename MT4::OppositeType tmp( A );
1674  subAssign( ~C, tmp * B );
1675  }
1676  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1677  const typename MT5::OppositeType tmp( B );
1678  subAssign( ~C, A * tmp );
1679  }
1680  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1681  const typename MT4::OppositeType tmp( A );
1682  subAssign( ~C, tmp * B );
1683  }
1684  else {
1685  const typename MT5::OppositeType tmp( B );
1686  subAssign( ~C, A * tmp );
1687  }
1688  }
1690  //**********************************************************************************************
1691 
1692  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1706  template< typename MT3 // Type of the left-hand side target matrix
1707  , typename MT4 // Type of the left-hand side matrix operand
1708  , typename MT5 > // Type of the right-hand side matrix operand
1709  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1710  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1711  {
1712  selectDefaultSubAssignKernel( C, A, B );
1713  }
1715  //**********************************************************************************************
1716 
1717  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1718 #if BLAZE_BLAS_MODE
1719 
1732  template< typename MT3 // Type of the left-hand side target matrix
1733  , typename MT4 // Type of the left-hand side matrix operand
1734  , typename MT5 > // Type of the right-hand side matrix operand
1735  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1736  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1737  {
1738  using boost::numeric_cast;
1739 
1743 
1744  const int M ( numeric_cast<int>( A.rows() ) );
1745  const int N ( numeric_cast<int>( B.columns() ) );
1746  const int K ( numeric_cast<int>( A.columns() ) );
1747  const int lda( numeric_cast<int>( A.spacing() ) );
1748  const int ldb( numeric_cast<int>( B.spacing() ) );
1749  const int ldc( numeric_cast<int>( C.spacing() ) );
1750 
1751  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1752  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1753  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1754  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1755  }
1757 #endif
1758  //**********************************************************************************************
1759 
1760  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1761 #if BLAZE_BLAS_MODE
1762 
1775  template< typename MT3 // Type of the left-hand side target matrix
1776  , typename MT4 // Type of the left-hand side matrix operand
1777  , typename MT5 > // Type of the right-hand side matrix operand
1778  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1779  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1780  {
1781  using boost::numeric_cast;
1782 
1786 
1787  const int M ( numeric_cast<int>( A.rows() ) );
1788  const int N ( numeric_cast<int>( B.columns() ) );
1789  const int K ( numeric_cast<int>( A.columns() ) );
1790  const int lda( numeric_cast<int>( A.spacing() ) );
1791  const int ldb( numeric_cast<int>( B.spacing() ) );
1792  const int ldc( numeric_cast<int>( C.spacing() ) );
1793 
1794  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1795  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1796  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1797  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1798  }
1800 #endif
1801  //**********************************************************************************************
1802 
1803  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1804 #if BLAZE_BLAS_MODE
1805 
1818  template< typename MT3 // Type of the left-hand side target matrix
1819  , typename MT4 // Type of the left-hand side matrix operand
1820  , typename MT5 > // Type of the right-hand side matrix operand
1821  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1822  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1823  {
1824  using boost::numeric_cast;
1825 
1829  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1830  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1831  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1832 
1833  const int M ( numeric_cast<int>( A.rows() ) );
1834  const int N ( numeric_cast<int>( B.columns() ) );
1835  const int K ( numeric_cast<int>( A.columns() ) );
1836  const int lda( numeric_cast<int>( A.spacing() ) );
1837  const int ldb( numeric_cast<int>( B.spacing() ) );
1838  const int ldc( numeric_cast<int>( C.spacing() ) );
1839  const complex<float> alpha( -1.0F, 0.0F );
1840  const complex<float> beta ( 1.0F, 0.0F );
1841 
1842  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1843  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1844  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1845  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1846  }
1848 #endif
1849  //**********************************************************************************************
1850 
1851  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1852 #if BLAZE_BLAS_MODE
1853 
1866  template< typename MT3 // Type of the left-hand side target matrix
1867  , typename MT4 // Type of the left-hand side matrix operand
1868  , typename MT5 > // Type of the right-hand side matrix operand
1869  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1870  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1871  {
1872  using boost::numeric_cast;
1873 
1877  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1878  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1879  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1880 
1881  const int M ( numeric_cast<int>( A.rows() ) );
1882  const int N ( numeric_cast<int>( B.columns() ) );
1883  const int K ( numeric_cast<int>( A.columns() ) );
1884  const int lda( numeric_cast<int>( A.spacing() ) );
1885  const int ldb( numeric_cast<int>( B.spacing() ) );
1886  const int ldc( numeric_cast<int>( C.spacing() ) );
1887  const complex<double> alpha( -1.0, 0.0 );
1888  const complex<double> beta ( 1.0, 0.0 );
1889 
1890  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1891  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1892  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1893  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1894  }
1896 #endif
1897  //**********************************************************************************************
1898 
1899  //**Subtraction assignment to sparse matrices***************************************************
1900  // No special implementation for the subtraction assignment to sparse matrices.
1901  //**********************************************************************************************
1902 
1903  //**Multiplication assignment to dense matrices*************************************************
1904  // No special implementation for the multiplication assignment to dense matrices.
1905  //**********************************************************************************************
1906 
1907  //**Multiplication assignment to sparse matrices************************************************
1908  // No special implementation for the multiplication assignment to sparse matrices.
1909  //**********************************************************************************************
1910 
1911  //**Compile time checks*************************************************************************
1918  //**********************************************************************************************
1919 };
1920 //*************************************************************************************************
1921 
1922 
1923 
1924 
1925 //=================================================================================================
1926 //
1927 // DMATSCALARMULTEXPR SPECIALIZATION
1928 //
1929 //=================================================================================================
1930 
1931 //*************************************************************************************************
1939 template< typename MT1 // Type of the left-hand side dense matrix
1940  , typename MT2 // Type of the right-hand side dense matrix
1941  , typename ST > // Type of the right-hand side scalar value
1942 class DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >
1943  : public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
1944  , private MatScalarMultExpr
1945  , private Computation
1946 {
1947  private:
1948  //**Type definitions****************************************************************************
1949  typedef DMatDMatMultExpr<MT1,MT2> MMM;
1950  typedef typename MMM::ResultType RES;
1951  typedef typename MT1::ResultType RT1;
1952  typedef typename MT2::ResultType RT2;
1953  typedef typename MT1::CompositeType CT1;
1954  typedef typename MT2::CompositeType CT2;
1955  //**********************************************************************************************
1956 
1957  //**********************************************************************************************
1959 
1962  template< typename T1, typename T2, typename T3, typename T4 >
1963  struct UseSinglePrecisionKernel {
1964  enum { value = IsFloat<typename T1::ElementType>::value &&
1965  IsFloat<typename T2::ElementType>::value &&
1966  IsFloat<typename T3::ElementType>::value &&
1967  !IsComplex<T4>::value };
1968  };
1969  //**********************************************************************************************
1970 
1971  //**********************************************************************************************
1973 
1976  template< typename T1, typename T2, typename T3, typename T4 >
1977  struct UseDoublePrecisionKernel {
1978  enum { value = IsDouble<typename T1::ElementType>::value &&
1979  IsDouble<typename T2::ElementType>::value &&
1980  IsDouble<typename T3::ElementType>::value &&
1981  !IsComplex<T4>::value };
1982  };
1983  //**********************************************************************************************
1984 
1985  //**********************************************************************************************
1987 
1990  template< typename T1, typename T2, typename T3 >
1991  struct UseSinglePrecisionComplexKernel {
1992  typedef complex<float> Type;
1993  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1994  IsSame<typename T2::ElementType,Type>::value &&
1995  IsSame<typename T3::ElementType,Type>::value };
1996  };
1997  //**********************************************************************************************
1998 
1999  //**********************************************************************************************
2001 
2004  template< typename T1, typename T2, typename T3 >
2005  struct UseDoublePrecisionComplexKernel {
2006  typedef complex<double> Type;
2007  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2008  IsSame<typename T2::ElementType,Type>::value &&
2009  IsSame<typename T3::ElementType,Type>::value };
2010  };
2011  //**********************************************************************************************
2012 
2013  //**********************************************************************************************
2015 
2017  template< typename T1, typename T2, typename T3, typename T4 >
2018  struct UseDefaultKernel {
2019  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2020  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2021  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2022  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2023  };
2024  //**********************************************************************************************
2025 
2026  //**********************************************************************************************
2028 
2030  template< typename T1, typename T2, typename T3, typename T4 >
2031  struct UseVectorizedDefaultKernel {
2032  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2033  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2034  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2035  IsSame<typename T1::ElementType,T4>::value &&
2036  IntrinsicTrait<typename T1::ElementType>::addition &&
2037  IntrinsicTrait<typename T1::ElementType>::multiplication };
2038  };
2039  //**********************************************************************************************
2040 
2041  public:
2042  //**Type definitions****************************************************************************
2043  typedef DMatScalarMultExpr<MMM,ST,false> This;
2044  typedef typename MultTrait<RES,ST>::Type ResultType;
2045  typedef typename ResultType::OppositeType OppositeType;
2046  typedef typename ResultType::TransposeType TransposeType;
2047  typedef typename ResultType::ElementType ElementType;
2048  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2049  typedef const ElementType ReturnType;
2050  typedef const ResultType CompositeType;
2051 
2053  typedef const DMatDMatMultExpr<MT1,MT2> LeftOperand;
2054 
2056  typedef ST RightOperand;
2057 
2059  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
2060 
2062  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
2063  //**********************************************************************************************
2064 
2065  //**Compilation flags***************************************************************************
2067  enum { vectorizable = 0 };
2068  //**********************************************************************************************
2069 
2070  //**Constructor*********************************************************************************
2076  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2077  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2078  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2079  {}
2080  //**********************************************************************************************
2081 
2082  //**Access operator*****************************************************************************
2089  inline ReturnType operator()( size_t i, size_t j ) const {
2090  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2091  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2092  return matrix_(i,j) * scalar_;
2093  }
2094  //**********************************************************************************************
2095 
2096  //**Rows function*******************************************************************************
2101  inline size_t rows() const {
2102  return matrix_.rows();
2103  }
2104  //**********************************************************************************************
2105 
2106  //**Columns function****************************************************************************
2111  inline size_t columns() const {
2112  return matrix_.columns();
2113  }
2114  //**********************************************************************************************
2115 
2116  //**Left operand access*************************************************************************
2121  inline LeftOperand leftOperand() const {
2122  return matrix_;
2123  }
2124  //**********************************************************************************************
2125 
2126  //**Right operand access************************************************************************
2131  inline RightOperand rightOperand() const {
2132  return scalar_;
2133  }
2134  //**********************************************************************************************
2135 
2136  //**********************************************************************************************
2142  template< typename T >
2143  inline bool canAlias( const T* alias ) const {
2144  return matrix_.canAlias( alias );
2145  }
2146  //**********************************************************************************************
2147 
2148  //**********************************************************************************************
2154  template< typename T >
2155  inline bool isAliased( const T* alias ) const {
2156  return matrix_.isAliased( alias );
2157  }
2158  //**********************************************************************************************
2159 
2160  private:
2161  //**Member variables****************************************************************************
2163  RightOperand scalar_;
2164  //**********************************************************************************************
2165 
2166  //**Assignment to dense matrices****************************************************************
2175  template< typename MT3 // Type of the target dense matrix
2176  , bool SO > // Storage order of the target dense matrix
2177  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2178  {
2180 
2181  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2182  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2183 
2184  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2185  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2186 
2187  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2188  return;
2189  }
2190  else if( left.columns() == 0UL ) {
2191  reset( ~lhs );
2192  return;
2193  }
2194 
2195  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2196  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2197 
2198  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2199  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2200  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2201  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2202  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2203  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2204 
2205  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
2206  DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2207  else
2208  DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2209  }
2210  //**********************************************************************************************
2211 
2212  //**Default assignment to dense matrices********************************************************
2226  template< typename MT3 // Type of the left-hand side target matrix
2227  , typename MT4 // Type of the left-hand side matrix operand
2228  , typename MT5 // Type of the right-hand side matrix operand
2229  , typename ST2 > // Type of the scalar value
2230  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2231  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2232  {
2233  const size_t M( A.rows() );
2234  const size_t N( B.columns() );
2235  const size_t K( A.columns() );
2236 
2237  for( size_t i=0UL; i<M; ++i ) {
2238  for( size_t j=0UL; j<N; ++j ) {
2239  C(i,j) = A(i,0UL) * B(0UL,j);
2240  }
2241  for( size_t k=1UL; k<K; ++k ) {
2242  for( size_t j=0UL; j<N; ++j ) {
2243  C(i,j) += A(i,k) * B(k,j);
2244  }
2245  }
2246  for( size_t j=0UL; j<N; ++j ) {
2247  C(i,j) *= scalar;
2248  }
2249  }
2250  }
2251  //**********************************************************************************************
2252 
2253  //**Vectorized default assignment to row-major dense matrices***********************************
2267  template< typename MT3 // Type of the left-hand side target matrix
2268  , typename MT4 // Type of the left-hand side matrix operand
2269  , typename MT5 // Type of the right-hand side matrix operand
2270  , typename ST2 > // Type of the scalar value
2271  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2272  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2273  {
2274  typedef IntrinsicTrait<ElementType> IT;
2275 
2276  const size_t M( A.rows() );
2277  const size_t N( B.columns() );
2278  const size_t K( A.columns() );
2279 
2280  const IntrinsicType factor( set( scalar ) );
2281 
2282  size_t j( 0UL );
2283 
2284  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2285  for( size_t i=0UL; i<M; ++i ) {
2286  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2287  for( size_t k=0UL; k<K; ++k ) {
2288  const IntrinsicType a1( set( A(i,k) ) );
2289  xmm1 = xmm1 + a1 * B.load(k,j );
2290  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2291  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2292  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2293  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
2294  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
2295  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
2296  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
2297  }
2298  (~C).store( i, j , xmm1 * factor );
2299  (~C).store( i, j+IT::size , xmm2 * factor );
2300  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
2301  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
2302  (~C).store( i, j+IT::size*4UL, xmm5 * factor );
2303  (~C).store( i, j+IT::size*5UL, xmm6 * factor );
2304  (~C).store( i, j+IT::size*6UL, xmm7 * factor );
2305  (~C).store( i, j+IT::size*7UL, xmm8 * factor );
2306  }
2307  }
2308  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2309  size_t i( 0UL );
2310  for( ; (i+2UL) <= M; i+=2UL ) {
2311  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2312  for( size_t k=0UL; k<K; ++k ) {
2313  const IntrinsicType a1( set( A(i ,k) ) );
2314  const IntrinsicType a2( set( A(i+1UL,k) ) );
2315  const IntrinsicType b1( B.load(k,j ) );
2316  const IntrinsicType b2( B.load(k,j+IT::size ) );
2317  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
2318  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
2319  xmm1 = xmm1 + a1 * b1;
2320  xmm2 = xmm2 + a1 * b2;
2321  xmm3 = xmm3 + a1 * b3;
2322  xmm4 = xmm4 + a1 * b4;
2323  xmm5 = xmm5 + a2 * b1;
2324  xmm6 = xmm6 + a2 * b2;
2325  xmm7 = xmm7 + a2 * b3;
2326  xmm8 = xmm8 + a2 * b4;
2327  }
2328  (~C).store( i , j , xmm1 * factor );
2329  (~C).store( i , j+IT::size , xmm2 * factor );
2330  (~C).store( i , j+IT::size*2UL, xmm3 * factor );
2331  (~C).store( i , j+IT::size*3UL, xmm4 * factor );
2332  (~C).store( i+1UL, j , xmm5 * factor );
2333  (~C).store( i+1UL, j+IT::size , xmm6 * factor );
2334  (~C).store( i+1UL, j+IT::size*2UL, xmm7 * factor );
2335  (~C).store( i+1UL, j+IT::size*3UL, xmm8 * factor );
2336  }
2337  if( i < M ) {
2338  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2339  for( size_t k=0UL; k<K; ++k ) {
2340  const IntrinsicType a1( set( A(i,k) ) );
2341  xmm1 = xmm1 + a1 * B.load(k,j );
2342  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2343  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2344  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2345  }
2346  (~C).store( i, j , xmm1 * factor );
2347  (~C).store( i, j+IT::size , xmm2 * factor );
2348  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
2349  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
2350  }
2351  }
2352  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2353  size_t i( 0UL );
2354  for( ; (i+2UL) <= M; i+=2UL ) {
2355  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2356  for( size_t k=0UL; k<K; ++k ) {
2357  const IntrinsicType a1( set( A(i ,k) ) );
2358  const IntrinsicType a2( set( A(i+1UL,k) ) );
2359  const IntrinsicType b1( B.load(k,j ) );
2360  const IntrinsicType b2( B.load(k,j+IT::size) );
2361  xmm1 = xmm1 + a1 * b1;
2362  xmm2 = xmm2 + a1 * b2;
2363  xmm3 = xmm3 + a2 * b1;
2364  xmm4 = xmm4 + a2 * b2;
2365  }
2366  (~C).store( i , j , xmm1 * factor );
2367  (~C).store( i , j+IT::size, xmm2 * factor );
2368  (~C).store( i+1UL, j , xmm3 * factor );
2369  (~C).store( i+1UL, j+IT::size, xmm4 * factor );
2370  }
2371  if( i < M ) {
2372  IntrinsicType xmm1, xmm2;
2373  for( size_t k=0UL; k<K; ++k ) {
2374  const IntrinsicType a1( set( A(i,k) ) );
2375  xmm1 = xmm1 + a1 * B.load(k,j );
2376  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
2377  }
2378  (~C).store( i, j , xmm1 * factor );
2379  (~C).store( i, j+IT::size, xmm2 * factor );
2380  }
2381  }
2382  if( j < N ) {
2383  size_t i( 0UL );
2384  for( ; (i+2UL) <= M; i+=2UL ) {
2385  IntrinsicType xmm1, xmm2;
2386  for( size_t k=0UL; k<K; ++k ) {
2387  const IntrinsicType b1( B.load(k,j) );
2388  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2389  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2390  }
2391  (~C).store( i , j, xmm1 * factor );
2392  (~C).store( i+1UL, j, xmm2 * factor );
2393  }
2394  if( i < M ) {
2395  IntrinsicType xmm1;
2396  for( size_t k=0UL; k<K; ++k ) {
2397  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
2398  }
2399  (~C).store( i, j, xmm1 * factor );
2400  }
2401  }
2402  }
2403  //**********************************************************************************************
2404 
2405  //**Vectorized default assignment to column-major dense matrices********************************
2419  template< typename MT3 // Type of the left-hand side target matrix
2420  , typename MT4 // Type of the left-hand side matrix operand
2421  , typename MT5 // Type of the right-hand side matrix operand
2422  , typename ST2 > // Type of the scalar value
2423  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2424  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2425  {
2428 
2429  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2430  const typename MT4::OppositeType tmp( A );
2431  assign( ~C, tmp * B * scalar );
2432  }
2433  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2434  const typename MT5::OppositeType tmp( B );
2435  assign( ~C, A * tmp * scalar );
2436  }
2437  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2438  const typename MT4::OppositeType tmp( A );
2439  assign( ~C, tmp * B * scalar );
2440  }
2441  else {
2442  const typename MT5::OppositeType tmp( B );
2443  assign( ~C, A * tmp * scalar );
2444  }
2445  }
2446  //**********************************************************************************************
2447 
2448  //**BLAS-based assignment to dense matrices (default)*******************************************
2462  template< typename MT3 // Type of the left-hand side target matrix
2463  , typename MT4 // Type of the left-hand side matrix operand
2464  , typename MT5 // Type of the right-hand side matrix operand
2465  , typename ST2 > // Type of the scalar value
2466  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2467  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2468  {
2469  selectDefaultAssignKernel( C, A, B, scalar );
2470  }
2471  //**********************************************************************************************
2472 
2473  //**BLAS-based assignment to dense matrices (single precision)**********************************
2474 #if BLAZE_BLAS_MODE
2475 
2488  template< typename MT3 // Type of the left-hand side target matrix
2489  , typename MT4 // Type of the left-hand side matrix operand
2490  , typename MT5 // Type of the right-hand side matrix operand
2491  , typename ST2 > // Type of the scalar value
2492  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2493  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2494  {
2495  using boost::numeric_cast;
2496 
2500 
2501  const int M ( numeric_cast<int>( A.rows() ) );
2502  const int N ( numeric_cast<int>( B.columns() ) );
2503  const int K ( numeric_cast<int>( A.columns() ) );
2504  const int lda( numeric_cast<int>( A.spacing() ) );
2505  const int ldb( numeric_cast<int>( B.spacing() ) );
2506  const int ldc( numeric_cast<int>( C.spacing() ) );
2507 
2508  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2509  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2510  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2511  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2512  }
2513 #endif
2514  //**********************************************************************************************
2515 
2516  //**BLAS-based assignment to dense matrices (double precision)**********************************
2517 #if BLAZE_BLAS_MODE
2518 
2531  template< typename MT3 // Type of the left-hand side target matrix
2532  , typename MT4 // Type of the left-hand side matrix operand
2533  , typename MT5 // Type of the right-hand side matrix operand
2534  , typename ST2 > // Type of the scalar value
2535  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2536  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2537  {
2538  using boost::numeric_cast;
2539 
2543 
2544  const int M ( numeric_cast<int>( A.rows() ) );
2545  const int N ( numeric_cast<int>( B.columns() ) );
2546  const int K ( numeric_cast<int>( A.columns() ) );
2547  const int lda( numeric_cast<int>( A.spacing() ) );
2548  const int ldb( numeric_cast<int>( B.spacing() ) );
2549  const int ldc( numeric_cast<int>( C.spacing() ) );
2550 
2551  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2552  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2553  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2554  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2555  }
2556 #endif
2557  //**********************************************************************************************
2558 
2559  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2560 #if BLAZE_BLAS_MODE
2561 
2574  template< typename MT3 // Type of the left-hand side target matrix
2575  , typename MT4 // Type of the left-hand side matrix operand
2576  , typename MT5 // Type of the right-hand side matrix operand
2577  , typename ST2 > // Type of the scalar value
2578  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2579  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2580  {
2581  using boost::numeric_cast;
2582 
2586  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2587  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2588  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2589 
2590  const int M ( numeric_cast<int>( A.rows() ) );
2591  const int N ( numeric_cast<int>( B.columns() ) );
2592  const int K ( numeric_cast<int>( A.columns() ) );
2593  const int lda( numeric_cast<int>( A.spacing() ) );
2594  const int ldb( numeric_cast<int>( B.spacing() ) );
2595  const int ldc( numeric_cast<int>( C.spacing() ) );
2596  const complex<float> alpha( scalar );
2597  const complex<float> beta ( 0.0F, 0.0F );
2598 
2599  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2600  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2601  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2602  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2603  }
2604 #endif
2605  //**********************************************************************************************
2606 
2607  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2608 #if BLAZE_BLAS_MODE
2609 
2622  template< typename MT3 // Type of the left-hand side target matrix
2623  , typename MT4 // Type of the left-hand side matrix operand
2624  , typename MT5 // Type of the right-hand side matrix operand
2625  , typename ST2 > // Type of the scalar
2626  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2627  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2628  {
2629  using boost::numeric_cast;
2630 
2634  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2635  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2636  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2637 
2638  const int M ( numeric_cast<int>( A.rows() ) );
2639  const int N ( numeric_cast<int>( B.columns() ) );
2640  const int K ( numeric_cast<int>( A.columns() ) );
2641  const int lda( numeric_cast<int>( A.spacing() ) );
2642  const int ldb( numeric_cast<int>( B.spacing() ) );
2643  const int ldc( numeric_cast<int>( C.spacing() ) );
2644  const complex<double> alpha( scalar );
2645  const complex<double> beta ( 0.0, 0.0 );
2646 
2647  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2648  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2649  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2650  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2651  }
2652 #endif
2653  //**********************************************************************************************
2654 
2655  //**Assignment to sparse matrices***************************************************************
2666  template< typename MT // Type of the target sparse matrix
2667  , bool SO > // Storage order of the target sparse matrix
2668  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2669  {
2671 
2672  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2673 
2680 
2681  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2682  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2683 
2684  const TmpType tmp( rhs );
2685  assign( ~lhs, tmp );
2686  }
2687  //**********************************************************************************************
2688 
2689  //**Addition assignment to dense matrices*******************************************************
2701  template< typename MT3 // Type of the target dense matrix
2702  , bool SO > // Storage order of the target dense matrix
2703  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2704  {
2706 
2707  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2708  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2709 
2710  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2711  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2712 
2713  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2714  return;
2715  }
2716 
2717  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2718  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2719 
2720  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2721  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2722  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2723  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2724  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2725  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2726 
2727  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
2728  DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2729  else
2730  DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2731  }
2732  //**********************************************************************************************
2733 
2734  //**Default addition assignment to dense matrices***********************************************
2748  template< typename MT3 // Type of the left-hand side target matrix
2749  , typename MT4 // Type of the left-hand side matrix operand
2750  , typename MT5 // Type of the right-hand side matrix operand
2751  , typename ST2 > // Type of the scalar value
2752  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2753  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2754  {
2755  const ResultType tmp( A * B * scalar );
2756  addAssign( C, tmp );
2757  }
2758  //**********************************************************************************************
2759 
2760  //**Vectorized default addition assignment to row-major dense matrices**************************
2774  template< typename MT3 // Type of the left-hand side target matrix
2775  , typename MT4 // Type of the left-hand side matrix operand
2776  , typename MT5 // Type of the right-hand side matrix operand
2777  , typename ST2 > // Type of the scalar value
2778  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2779  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2780  {
2781  typedef IntrinsicTrait<ElementType> IT;
2782 
2783  const size_t M( A.rows() );
2784  const size_t N( B.columns() );
2785  const size_t K( A.columns() );
2786 
2787  const IntrinsicType factor( set( scalar ) );
2788 
2789  size_t j( 0UL );
2790 
2791  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2792  for( size_t i=0UL; i<M; ++i ) {
2793  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2794  for( size_t k=0UL; k<K; ++k ) {
2795  const IntrinsicType a1( set( A(i,k) ) );
2796  xmm1 = xmm1 + a1 * B.load(k,j );
2797  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2798  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2799  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2800  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
2801  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
2802  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
2803  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
2804  }
2805  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
2806  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
2807  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
2808  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
2809  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) + xmm5 * factor );
2810  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) + xmm6 * factor );
2811  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) + xmm7 * factor );
2812  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) + xmm8 * factor );
2813  }
2814  }
2815  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2816  size_t i( 0UL );
2817  for( ; (i+2UL) <= M; i+=2UL ) {
2818  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2819  for( size_t k=0UL; k<K; ++k ) {
2820  const IntrinsicType a1( set( A(i ,k) ) );
2821  const IntrinsicType a2( set( A(i+1UL,k) ) );
2822  const IntrinsicType b1( B.load(k,j ) );
2823  const IntrinsicType b2( B.load(k,j+IT::size ) );
2824  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
2825  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
2826  xmm1 = xmm1 + a1 * b1;
2827  xmm2 = xmm2 + a1 * b2;
2828  xmm3 = xmm3 + a1 * b3;
2829  xmm4 = xmm4 + a1 * b4;
2830  xmm5 = xmm5 + a2 * b1;
2831  xmm6 = xmm6 + a2 * b2;
2832  xmm7 = xmm7 + a2 * b3;
2833  xmm8 = xmm8 + a2 * b4;
2834  }
2835  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
2836  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) + xmm2 * factor );
2837  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) + xmm3 * factor );
2838  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) + xmm4 * factor );
2839  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
2840  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) + xmm6 * factor );
2841  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) + xmm7 * factor );
2842  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) + xmm8 * factor );
2843  }
2844  if( i < M ) {
2845  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2846  for( size_t k=0UL; k<K; ++k ) {
2847  const IntrinsicType a1( set( A(i,k) ) );
2848  xmm1 = xmm1 + a1 * B.load(k,j );
2849  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2850  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2851  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2852  }
2853  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
2854  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
2855  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
2856  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
2857  }
2858  }
2859  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2860  size_t i( 0UL );
2861  for( ; (i+2UL) <= M; i+=2UL ) {
2862  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2863  for( size_t k=0UL; k<K; ++k ) {
2864  const IntrinsicType a1( set( A(i ,k) ) );
2865  const IntrinsicType a2( set( A(i+1UL,k) ) );
2866  const IntrinsicType b1( B.load(k,j ) );
2867  const IntrinsicType b2( B.load(k,j+IT::size) );
2868  xmm1 = xmm1 + a1 * b1;
2869  xmm2 = xmm2 + a1 * b2;
2870  xmm3 = xmm3 + a2 * b1;
2871  xmm4 = xmm4 + a2 * b2;
2872  }
2873  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
2874  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) + xmm2 * factor );
2875  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
2876  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) + xmm4 * factor );
2877  }
2878  if( i < M ) {
2879  IntrinsicType xmm1, xmm2;
2880  for( size_t k=0UL; k<K; ++k ) {
2881  const IntrinsicType a1( set( A(i,k) ) );
2882  xmm1 = xmm1 + a1 * B.load(k,j );
2883  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
2884  }
2885  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
2886  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) + xmm2 * factor );
2887  }
2888  }
2889  if( j < N ) {
2890  size_t i( 0UL );
2891  for( ; (i+2UL) <= M; i+=2UL ) {
2892  IntrinsicType xmm1, xmm2;
2893  for( size_t k=0UL; k<K; ++k ) {
2894  const IntrinsicType b1( B.load(k,j) );
2895  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2896  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2897  }
2898  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
2899  (~C).store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
2900  }
2901  if( i < M ) {
2902  IntrinsicType xmm1;
2903  for( size_t k=0UL; k<K; ++k ) {
2904  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
2905  }
2906  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
2907  }
2908  }
2909  }
2910  //**********************************************************************************************
2911 
2912  //**Vectorized default addition assignment to column-major dense matrices***********************
2926  template< typename MT3 // Type of the left-hand side target matrix
2927  , typename MT4 // Type of the left-hand side matrix operand
2928  , typename MT5 // Type of the right-hand side matrix operand
2929  , typename ST2 > // Type of the scalar value
2930  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2931  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2932  {
2935 
2936  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2937  const typename MT4::OppositeType tmp( A );
2938  addAssign( ~C, tmp * B * scalar );
2939  }
2940  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2941  const typename MT5::OppositeType tmp( B );
2942  addAssign( ~C, A * tmp * scalar );
2943  }
2944  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2945  const typename MT4::OppositeType tmp( A );
2946  addAssign( ~C, tmp * B * scalar );
2947  }
2948  else {
2949  const typename MT5::OppositeType tmp( B );
2950  addAssign( ~C, A * tmp * scalar );
2951  }
2952  }
2953  //**********************************************************************************************
2954 
2955  //**BLAS-based addition assignment to dense matrices (default)**********************************
2969  template< typename MT3 // Type of the left-hand side target matrix
2970  , typename MT4 // Type of the left-hand side matrix operand
2971  , typename MT5 // Type of the right-hand side matrix operand
2972  , typename ST2 > // Type of the scalar value
2973  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2974  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2975  {
2976  selectDefaultAddAssignKernel( C, A, B, scalar );
2977  }
2978  //**********************************************************************************************
2979 
2980  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2981 #if BLAZE_BLAS_MODE
2982 
2995  template< typename MT3 // Type of the left-hand side target matrix
2996  , typename MT4 // Type of the left-hand side matrix operand
2997  , typename MT5 // Type of the right-hand side matrix operand
2998  , typename ST2 > // Type of the scalar value
2999  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3000  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3001  {
3002  using boost::numeric_cast;
3003 
3007 
3008  const int M ( numeric_cast<int>( A.rows() ) );
3009  const int N ( numeric_cast<int>( B.columns() ) );
3010  const int K ( numeric_cast<int>( A.columns() ) );
3011  const int lda( numeric_cast<int>( A.spacing() ) );
3012  const int ldb( numeric_cast<int>( B.spacing() ) );
3013  const int ldc( numeric_cast<int>( C.spacing() ) );
3014 
3015  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3016  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3017  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3018  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3019  }
3020 #endif
3021  //**********************************************************************************************
3022 
3023  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3024 #if BLAZE_BLAS_MODE
3025 
3038  template< typename MT3 // Type of the left-hand side target matrix
3039  , typename MT4 // Type of the left-hand side matrix operand
3040  , typename MT5 // Type of the right-hand side matrix operand
3041  , typename ST2 > // Type of the scalar value
3042  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3043  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3044  {
3045  using boost::numeric_cast;
3046 
3050 
3051  const int M ( numeric_cast<int>( A.rows() ) );
3052  const int N ( numeric_cast<int>( B.columns() ) );
3053  const int K ( numeric_cast<int>( A.columns() ) );
3054  const int lda( numeric_cast<int>( A.spacing() ) );
3055  const int ldb( numeric_cast<int>( B.spacing() ) );
3056  const int ldc( numeric_cast<int>( C.spacing() ) );
3057 
3058  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3059  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3060  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3061  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3062  }
3063 #endif
3064  //**********************************************************************************************
3065 
3066  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3067 #if BLAZE_BLAS_MODE
3068 
3081  template< typename MT3 // Type of the left-hand side target matrix
3082  , typename MT4 // Type of the left-hand side matrix operand
3083  , typename MT5 // Type of the right-hand side matrix operand
3084  , typename ST2 > // Type of the scalar value
3085  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3086  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3087  {
3088  using boost::numeric_cast;
3089 
3093  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3094  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3095  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3096 
3097  const int M ( numeric_cast<int>( A.rows() ) );
3098  const int N ( numeric_cast<int>( B.columns() ) );
3099  const int K ( numeric_cast<int>( A.columns() ) );
3100  const int lda( numeric_cast<int>( A.spacing() ) );
3101  const int ldb( numeric_cast<int>( B.spacing() ) );
3102  const int ldc( numeric_cast<int>( C.spacing() ) );
3103  const complex<float> alpha( scalar );
3104  const complex<float> beta ( 1.0F, 0.0F );
3105 
3106  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3107  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3108  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3109  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3110  }
3111 #endif
3112  //**********************************************************************************************
3113 
3114  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3115 #if BLAZE_BLAS_MODE
3116 
3129  template< typename MT3 // Type of the left-hand side target matrix
3130  , typename MT4 // Type of the left-hand side matrix operand
3131  , typename MT5 // Type of the right-hand side matrix operand
3132  , typename ST2 > // Type of the scalar value
3133  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3134  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3135  {
3136  using boost::numeric_cast;
3137 
3141  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3142  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3143  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3144 
3145  const int M ( numeric_cast<int>( A.rows() ) );
3146  const int N ( numeric_cast<int>( B.columns() ) );
3147  const int K ( numeric_cast<int>( A.columns() ) );
3148  const int lda( numeric_cast<int>( A.spacing() ) );
3149  const int ldb( numeric_cast<int>( B.spacing() ) );
3150  const int ldc( numeric_cast<int>( C.spacing() ) );
3151  const complex<double> alpha( scalar );
3152  const complex<double> beta ( 1.0, 0.0 );
3153 
3154  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3155  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3156  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3157  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3158  }
3159 #endif
3160  //**********************************************************************************************
3161 
3162  //**Addition assignment to sparse matrices******************************************************
3163  // No special implementation for the addition assignment to sparse matrices.
3164  //**********************************************************************************************
3165 
3166  //**Subtraction assignment to dense matrices****************************************************
3178  template< typename MT3 // Type of the target dense matrix
3179  , bool SO > // Storage order of the target dense matrix
3180  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3181  {
3183 
3184  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3185  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3186 
3187  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3188  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3189 
3190  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3191  return;
3192  }
3193 
3194  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3195  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3196 
3197  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3198  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3199  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3200  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3201  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3202  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3203 
3204  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
3205  DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3206  else
3207  DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3208  }
3209  //**********************************************************************************************
3210 
3211  //**Default subtraction assignment to dense matrices********************************************
3225  template< typename MT3 // Type of the left-hand side target matrix
3226  , typename MT4 // Type of the left-hand side matrix operand
3227  , typename MT5 // Type of the right-hand side matrix operand
3228  , typename ST2 > // Type of the scalar value
3229  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3230  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3231  {
3232  const ResultType tmp( A * B * scalar );
3233  subAssign( C, tmp );
3234  }
3235  //**********************************************************************************************
3236 
3237  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3251  template< typename MT3 // Type of the left-hand side target matrix
3252  , typename MT4 // Type of the left-hand side matrix operand
3253  , typename MT5 // Type of the right-hand side matrix operand
3254  , typename ST2 > // Type of the scalar value
3255  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3256  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3257  {
3258  typedef IntrinsicTrait<ElementType> IT;
3259 
3260  const size_t M( A.rows() );
3261  const size_t N( B.columns() );
3262  const size_t K( A.columns() );
3263 
3264  const IntrinsicType factor( set( scalar ) );
3265 
3266  size_t j( 0UL );
3267 
3268  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3269  for( size_t i=0UL; i<M; ++i ) {
3270  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3271  for( size_t k=0UL; k<K; ++k ) {
3272  const IntrinsicType a1( set( A(i,k) ) );
3273  xmm1 = xmm1 + a1 * B.load(k,j );
3274  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3275  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3276  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3277  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3278  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3279  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3280  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3281  }
3282  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3283  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3284  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3285  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3286  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) - xmm5 * factor );
3287  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) - xmm6 * factor );
3288  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) - xmm7 * factor );
3289  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) - xmm8 * factor );
3290  }
3291  }
3292  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3293  size_t i( 0UL );
3294  for( ; (i+2UL) <= M; i+=2UL ) {
3295  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3296  for( size_t k=0UL; k<K; ++k ) {
3297  const IntrinsicType a1( set( A(i ,k) ) );
3298  const IntrinsicType a2( set( A(i+1UL,k) ) );
3299  const IntrinsicType b1( B.load(k,j ) );
3300  const IntrinsicType b2( B.load(k,j+IT::size ) );
3301  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3302  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3303  xmm1 = xmm1 + a1 * b1;
3304  xmm2 = xmm2 + a1 * b2;
3305  xmm3 = xmm3 + a1 * b3;
3306  xmm4 = xmm4 + a1 * b4;
3307  xmm5 = xmm5 + a2 * b1;
3308  xmm6 = xmm6 + a2 * b2;
3309  xmm7 = xmm7 + a2 * b3;
3310  xmm8 = xmm8 + a2 * b4;
3311  }
3312  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3313  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) - xmm2 * factor );
3314  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) - xmm3 * factor );
3315  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) - xmm4 * factor );
3316  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
3317  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) - xmm6 * factor );
3318  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) - xmm7 * factor );
3319  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) - xmm8 * factor );
3320  }
3321  if( i < M ) {
3322  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3323  for( size_t k=0UL; k<K; ++k ) {
3324  const IntrinsicType a1( set( A(i,k) ) );
3325  xmm1 = xmm1 + a1 * B.load(k,j );
3326  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3327  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3328  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3329  }
3330  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3331  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3332  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3333  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3334  }
3335  }
3336  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3337  size_t i( 0UL );
3338  for( ; (i+2UL) <= M; i+=2UL ) {
3339  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3340  for( size_t k=0UL; k<K; ++k ) {
3341  const IntrinsicType a1( set( A(i ,k) ) );
3342  const IntrinsicType a2( set( A(i+1UL,k) ) );
3343  const IntrinsicType b1( B.load(k,j ) );
3344  const IntrinsicType b2( B.load(k,j+IT::size) );
3345  xmm1 = xmm1 + a1 * b1;
3346  xmm2 = xmm2 + a1 * b2;
3347  xmm3 = xmm3 + a2 * b1;
3348  xmm4 = xmm4 + a2 * b2;
3349  }
3350  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3351  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) - xmm2 * factor );
3352  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
3353  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) - xmm4 * factor );
3354  }
3355  if( i < M ) {
3356  IntrinsicType xmm1, xmm2;
3357  for( size_t k=0UL; k<K; ++k ) {
3358  const IntrinsicType a1( set( A(i,k) ) );
3359  xmm1 = xmm1 + a1 * B.load(k,j );
3360  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3361  }
3362  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3363  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) - xmm2 * factor );
3364  }
3365  }
3366  if( j < N ) {
3367  size_t i( 0UL );
3368  for( ; (i+2UL) <= M; i+=2UL ) {
3369  IntrinsicType xmm1, xmm2;
3370  for( size_t k=0UL; k<K; ++k ) {
3371  const IntrinsicType b1( B.load(k,j) );
3372  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3373  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3374  }
3375  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3376  (~C).store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
3377  }
3378  if( i < M ) {
3379  IntrinsicType xmm1;
3380  for( size_t k=0UL; k<K; ++k ) {
3381  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
3382  }
3383  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
3384  }
3385  }
3386  }
3387  //**********************************************************************************************
3388 
3389  //**Vectorized default subtraction assignment to column-major dense matrices********************
3403  template< typename MT3 // Type of the left-hand side target matrix
3404  , typename MT4 // Type of the left-hand side matrix operand
3405  , typename MT5 // Type of the right-hand side matrix operand
3406  , typename ST2 > // Type of the scalar value
3407  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3408  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3409  {
3412 
3413  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3414  const typename MT4::OppositeType tmp( A );
3415  subAssign( ~C, tmp * B * scalar );
3416  }
3417  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3418  const typename MT5::OppositeType tmp( B );
3419  subAssign( ~C, A * tmp * scalar );
3420  }
3421  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3422  const typename MT4::OppositeType tmp( A );
3423  subAssign( ~C, tmp * B * scalar );
3424  }
3425  else {
3426  const typename MT5::OppositeType tmp( B );
3427  subAssign( ~C, A * tmp * scalar );
3428  }
3429  }
3430  //**********************************************************************************************
3431 
3432  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3446  template< typename MT3 // Type of the left-hand side target matrix
3447  , typename MT4 // Type of the left-hand side matrix operand
3448  , typename MT5 // Type of the right-hand side matrix operand
3449  , typename ST2 > // Type of the scalar value
3450  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3451  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3452  {
3453  selectDefaultSubAssignKernel( C, A, B, scalar );
3454  }
3455  //**********************************************************************************************
3456 
3457  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3458 #if BLAZE_BLAS_MODE
3459 
3472  template< typename MT3 // Type of the left-hand side target matrix
3473  , typename MT4 // Type of the left-hand side matrix operand
3474  , typename MT5 // Type of the right-hand side matrix operand
3475  , typename ST2 > // Type of the scalar value
3476  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3477  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3478  {
3479  using boost::numeric_cast;
3480 
3484 
3485  const int M ( numeric_cast<int>( A.rows() ) );
3486  const int N ( numeric_cast<int>( B.columns() ) );
3487  const int K ( numeric_cast<int>( A.columns() ) );
3488  const int lda( numeric_cast<int>( A.spacing() ) );
3489  const int ldb( numeric_cast<int>( B.spacing() ) );
3490  const int ldc( numeric_cast<int>( C.spacing() ) );
3491 
3492  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3493  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3494  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3495  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3496  }
3497 #endif
3498  //**********************************************************************************************
3499 
3500  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3501 #if BLAZE_BLAS_MODE
3502 
3515  template< typename MT3 // Type of the left-hand side target matrix
3516  , typename MT4 // Type of the left-hand side matrix operand
3517  , typename MT5 // Type of the right-hand side matrix operand
3518  , typename ST2 > // Type of the scalar value
3519  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3520  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3521  {
3522  using boost::numeric_cast;
3523 
3527 
3528  const int M ( numeric_cast<int>( A.rows() ) );
3529  const int N ( numeric_cast<int>( B.columns() ) );
3530  const int K ( numeric_cast<int>( A.columns() ) );
3531  const int lda( numeric_cast<int>( A.spacing() ) );
3532  const int ldb( numeric_cast<int>( B.spacing() ) );
3533  const int ldc( numeric_cast<int>( C.spacing() ) );
3534 
3535  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3536  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3537  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3538  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3539  }
3540 #endif
3541  //**********************************************************************************************
3542 
3543  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3544 #if BLAZE_BLAS_MODE
3545 
3558  template< typename MT3 // Type of the left-hand side target matrix
3559  , typename MT4 // Type of the left-hand side matrix operand
3560  , typename MT5 // Type of the right-hand side matrix operand
3561  , typename ST2 > // Type of the scalar value
3562  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3563  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3564  {
3565  using boost::numeric_cast;
3566 
3570  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3571  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3572  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3573 
3574  const int M ( numeric_cast<int>( A.rows() ) );
3575  const int N ( numeric_cast<int>( B.columns() ) );
3576  const int K ( numeric_cast<int>( A.columns() ) );
3577  const int lda( numeric_cast<int>( A.spacing() ) );
3578  const int ldb( numeric_cast<int>( B.spacing() ) );
3579  const int ldc( numeric_cast<int>( C.spacing() ) );
3580  const complex<float> alpha( -scalar );
3581  const complex<float> beta ( 1.0F, 0.0F );
3582 
3583  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3584  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3585  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3586  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3587  }
3588 #endif
3589  //**********************************************************************************************
3590 
3591  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3592 #if BLAZE_BLAS_MODE
3593 
3606  template< typename MT3 // Type of the left-hand side target matrix
3607  , typename MT4 // Type of the left-hand side matrix operand
3608  , typename MT5 // Type of the right-hand side matrix operand
3609  , typename ST2 > // Type of the scalar value
3610  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3611  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3612  {
3613  using boost::numeric_cast;
3614 
3618  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3619  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3620  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3621 
3622  const int M ( numeric_cast<int>( A.rows() ) );
3623  const int N ( numeric_cast<int>( B.columns() ) );
3624  const int K ( numeric_cast<int>( A.columns() ) );
3625  const int lda( numeric_cast<int>( A.spacing() ) );
3626  const int ldb( numeric_cast<int>( B.spacing() ) );
3627  const int ldc( numeric_cast<int>( C.spacing() ) );
3628  const complex<double> alpha( -scalar );
3629  const complex<double> beta ( 1.0, 0.0 );
3630 
3631  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3632  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3633  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3634  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3635  }
3636 #endif
3637  //**********************************************************************************************
3638 
3639  //**Subtraction assignment to sparse matrices***************************************************
3640  // No special implementation for the subtraction assignment to sparse matrices.
3641  //**********************************************************************************************
3642 
3643  //**Multiplication assignment to dense matrices*************************************************
3644  // No special implementation for the multiplication assignment to dense matrices.
3645  //**********************************************************************************************
3646 
3647  //**Multiplication assignment to sparse matrices************************************************
3648  // No special implementation for the multiplication assignment to sparse matrices.
3649  //**********************************************************************************************
3650 
3651  //**Compile time checks*************************************************************************
3660  //**********************************************************************************************
3661 };
3663 //*************************************************************************************************
3664 
3665 
3666 
3667 
3668 //=================================================================================================
3669 //
3670 // GLOBAL BINARY ARITHMETIC OPERATORS
3671 //
3672 //=================================================================================================
3673 
3674 //*************************************************************************************************
3700 template< typename T1 // Type of the left-hand side dense matrix
3701  , typename T2 > // Type of the right-hand side dense matrix
3702 inline const DMatDMatMultExpr<T1,T2>
3704 {
3706 
3707  if( (~lhs).columns() != (~rhs).rows() )
3708  throw std::invalid_argument( "Matrix sizes do not match" );
3709 
3710  return DMatDMatMultExpr<T1,T2>( ~lhs, ~rhs );
3711 }
3712 //*************************************************************************************************
3713 
3714 
3715 
3716 
3717 //=================================================================================================
3718 //
3719 // EXPRESSION TRAIT SPECIALIZATIONS
3720 //
3721 //=================================================================================================
3722 
3723 //*************************************************************************************************
3725 template< typename MT1, typename MT2, typename VT >
3726 struct DMatDVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
3727 {
3728  public:
3729  //**********************************************************************************************
3730  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3731  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
3732  IsDenseVector<VT>::value && IsColumnVector<VT>::value
3733  , typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
3734  , INVALID_TYPE >::Type Type;
3735  //**********************************************************************************************
3736 };
3738 //*************************************************************************************************
3739 
3740 
3741 //*************************************************************************************************
3743 template< typename MT1, typename MT2, typename VT >
3744 struct DMatSVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
3745 {
3746  public:
3747  //**********************************************************************************************
3748  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3749  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
3750  IsSparseVector<VT>::value && IsColumnVector<VT>::value
3751  , typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
3752  , INVALID_TYPE >::Type Type;
3753  //**********************************************************************************************
3754 };
3756 //*************************************************************************************************
3757 
3758 
3759 //*************************************************************************************************
3761 template< typename VT, typename MT1, typename MT2 >
3762 struct TDVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
3763 {
3764  public:
3765  //**********************************************************************************************
3766  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
3767  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3768  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
3769  , typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3770  , INVALID_TYPE >::Type Type;
3771  //**********************************************************************************************
3772 };
3774 //*************************************************************************************************
3775 
3776 
3777 //*************************************************************************************************
3779 template< typename VT, typename MT1, typename MT2 >
3780 struct TSVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
3781 {
3782  public:
3783  //**********************************************************************************************
3784  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
3785  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3786  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
3787  , typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3788  , INVALID_TYPE >::Type Type;
3789  //**********************************************************************************************
3790 };
3792 //*************************************************************************************************
3793 
3794 
3795 //*************************************************************************************************
3797 template< typename MT1, typename MT2 >
3798 struct SubmatrixExprTrait< DMatDMatMultExpr<MT1,MT2> >
3799 {
3800  public:
3801  //**********************************************************************************************
3802  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1>::Type
3803  , typename SubmatrixExprTrait<const MT2>::Type >::Type Type;
3804  //**********************************************************************************************
3805 };
3807 //*************************************************************************************************
3808 
3809 
3810 //*************************************************************************************************
3812 template< typename MT1, typename MT2 >
3813 struct RowExprTrait< DMatDMatMultExpr<MT1,MT2> >
3814 {
3815  public:
3816  //**********************************************************************************************
3817  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
3818  //**********************************************************************************************
3819 };
3821 //*************************************************************************************************
3822 
3823 
3824 //*************************************************************************************************
3826 template< typename MT1, typename MT2 >
3827 struct ColumnExprTrait< DMatDMatMultExpr<MT1,MT2> >
3828 {
3829  public:
3830  //**********************************************************************************************
3831  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
3832  //**********************************************************************************************
3833 };
3835 //*************************************************************************************************
3836 
3837 } // namespace blaze
3838 
3839 #endif
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:123
Data type constraint.
Constraint on the data type.
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
Header file for the SparseVector base class.
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:232
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:297
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
DMatDMatMultExpr< MT1, MT2 > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:219
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:223
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
Header file for the DenseVector base class.
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:246
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:229
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:114
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2371
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
SelectType< IsComputation< MT1 >::value, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:235
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:317
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDMatMultExpr.h:224
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
const size_t DMATDMATMULT_THRESHOLD
Row-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the thr...
Definition: Thresholds.h:119
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:252
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:221
Header file for the EnableIf class template.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:339
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:327
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:267
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:358
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:220
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:226
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:307
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:222
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:225
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:120
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
SelectType< IsComputation< MT2 >::value, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:238
Header file for the complex data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:122
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:121
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:359
Header file for the IsResizable type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Operand matrix_
The dense matrix containing the submatrix.
Definition: DenseSubmatrix.h:2792
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
void store(float *address, const sse_float_t &value)
Aligned store of a vector of &#39;float&#39; values.
Definition: Store.h:242
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:351
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.