All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
40 #include <blaze/math/Intrinsics.h>
41 #include <blaze/math/shims/Reset.h>
58 #include <blaze/system/BLAS.h>
60 #include <blaze/util/Assert.h>
61 #include <blaze/util/Complex.h>
67 #include <blaze/util/DisableIf.h>
68 #include <blaze/util/EnableIf.h>
69 #include <blaze/util/InvalidType.h>
71 #include <blaze/util/SelectType.h>
72 #include <blaze/util/Types.h>
78 
79 
80 namespace blaze {
81 
82 //=================================================================================================
83 //
84 // CLASS DMATDMATMULTEXPR
85 //
86 //=================================================================================================
87 
88 //*************************************************************************************************
95 template< typename MT1 // Type of the left-hand side dense matrix
96  , typename MT2 > // Type of the right-hand side dense matrix
97 class DMatDMatMultExpr : public DenseMatrix< DMatDMatMultExpr<MT1,MT2>, false >
98  , private Expression
99  , private Computation
100 {
101  private:
102  //**Type definitions****************************************************************************
103  typedef typename MT1::ResultType RT1;
104  typedef typename MT2::ResultType RT2;
105  typedef typename MT1::CompositeType CT1;
106  typedef typename MT2::CompositeType CT2;
107  //**********************************************************************************************
108 
109  //**********************************************************************************************
111 
112 
114  template< typename T1, typename T2, typename T3 >
115  struct UseSinglePrecisionKernel {
119  };
121  //**********************************************************************************************
122 
123  //**********************************************************************************************
125 
126 
128  template< typename T1, typename T2, typename T3 >
129  struct UseDoublePrecisionKernel {
133  };
135  //**********************************************************************************************
136 
137  //**********************************************************************************************
139 
140 
143  template< typename T1, typename T2, typename T3 >
144  struct UseSinglePrecisionComplexKernel {
145  typedef complex<float> Type;
146  enum { value = IsSame<typename T1::ElementType,Type>::value &&
147  IsSame<typename T2::ElementType,Type>::value &&
148  IsSame<typename T3::ElementType,Type>::value };
149  };
151  //**********************************************************************************************
152 
153  //**********************************************************************************************
155 
156 
159  template< typename T1, typename T2, typename T3 >
160  struct UseDoublePrecisionComplexKernel {
161  typedef complex<double> Type;
162  enum { value = IsSame<typename T1::ElementType,Type>::value &&
163  IsSame<typename T2::ElementType,Type>::value &&
164  IsSame<typename T3::ElementType,Type>::value };
165  };
167  //**********************************************************************************************
168 
169  //**********************************************************************************************
171 
172 
174  template< typename T1, typename T2, typename T3 >
175  struct UseDefaultKernel {
176  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
177  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
178  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
179  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
180  };
182  //**********************************************************************************************
183 
184  //**********************************************************************************************
186 
187 
189  template< typename T1, typename T2, typename T3 >
190  struct UseVectorizedDefaultKernel {
191  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
192  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
193  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
194  IntrinsicTrait<typename T1::ElementType>::addition &&
195  IntrinsicTrait<typename T1::ElementType>::multiplication };
196  };
198  //**********************************************************************************************
199 
200  public:
201  //**Type definitions****************************************************************************
204  typedef typename ResultType::OppositeType OppositeType;
205  typedef typename ResultType::TransposeType TransposeType;
206  typedef typename ResultType::ElementType ElementType;
208  typedef const ElementType ReturnType;
209  typedef const ResultType CompositeType;
210 
212  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
213 
215  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
216 
218  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
219 
221  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
222  //**********************************************************************************************
223 
224  //**Compilation flags***************************************************************************
226  enum { vectorizable = 0 };
227  //**********************************************************************************************
228 
229  //**Constructor*********************************************************************************
235  explicit inline DMatDMatMultExpr( const MT1& lhs, const MT2& rhs )
236  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
237  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
238  {
239  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
240  }
241  //**********************************************************************************************
242 
243  //**Access operator*****************************************************************************
250  inline ReturnType operator()( size_t i, size_t j ) const {
251  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
252  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
253 
254  ElementType tmp;
255 
256  if( lhs_.columns() != 0UL ) {
257  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
258  tmp = lhs_(i,0UL) * rhs_(0UL,j);
259  for( size_t k=1UL; k<end; k+=2UL ) {
260  tmp += lhs_(i,k ) * rhs_(k ,j);
261  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
262  }
263  if( end < lhs_.columns() ) {
264  tmp += lhs_(i,end) * rhs_(end,j);
265  }
266  }
267  else {
268  reset( tmp );
269  }
270 
271  return tmp;
272  }
273  //**********************************************************************************************
274 
275  //**Rows function*******************************************************************************
280  inline size_t rows() const {
281  return lhs_.rows();
282  }
283  //**********************************************************************************************
284 
285  //**Columns function****************************************************************************
290  inline size_t columns() const {
291  return rhs_.columns();
292  }
293  //**********************************************************************************************
294 
295  //**Left operand access*************************************************************************
300  inline LeftOperand leftOperand() const {
301  return lhs_;
302  }
303  //**********************************************************************************************
304 
305  //**Right operand access************************************************************************
310  inline RightOperand rightOperand() const {
311  return rhs_;
312  }
313  //**********************************************************************************************
314 
315  //**********************************************************************************************
321  template< typename T >
322  inline bool canAlias( const T* alias ) const {
323  return ( lhs_.canAlias( alias ) || rhs_.canAlias( alias ) );
324  }
325  //**********************************************************************************************
326 
327  //**********************************************************************************************
333  template< typename T >
334  inline bool isAliased( const T* alias ) const {
335  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
336  }
337  //**********************************************************************************************
338 
339  private:
340  //**Member variables****************************************************************************
343  //**********************************************************************************************
344 
345  //**Assignment to dense matrices****************************************************************
355  template< typename MT3 // Type of the target dense matrix
356  , bool SO > // Storage order of the target dense matrix
357  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
358  {
360 
361  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
362  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
363 
364  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
365  return;
366  }
367  else if( rhs.lhs_.columns() == 0UL ) {
368  reset( ~lhs );
369  return;
370  }
371 
372  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
373  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
374 
375  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
376  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
377  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
378  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
379  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
380  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
381 
382  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
383  DMatDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
384  else
385  DMatDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
386  }
388  //**********************************************************************************************
389 
390  //**Default assignment to dense matrices********************************************************
403  template< typename MT3 // Type of the left-hand side target matrix
404  , typename MT4 // Type of the left-hand side matrix operand
405  , typename MT5 > // Type of the right-hand side matrix operand
406  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
407  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
408  {
409  const size_t M( A.rows() );
410  const size_t N( B.columns() );
411  const size_t K( A.columns() );
412 
413  for( size_t i=0UL; i<M; ++i ) {
414  for( size_t j=0UL; j<N; ++j ) {
415  C(i,j) = A(i,0UL) * B(0UL,j);
416  }
417  for( size_t k=1UL; k<K; ++k ) {
418  for( size_t j=0UL; j<N; ++j ) {
419  C(i,j) += A(i,k) * B(k,j);
420  }
421  }
422  }
423  }
425  //**********************************************************************************************
426 
427  //**Vectorized default assignment to row-major dense matrices***********************************
441  template< typename MT3 // Type of the left-hand side target matrix
442  , typename MT4 // Type of the left-hand side matrix operand
443  , typename MT5 > // Type of the right-hand side matrix operand
444  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
445  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
446  {
447  typedef IntrinsicTrait<ElementType> IT;
448 
449  const size_t M( A.rows() );
450  const size_t N( B.spacing() );
451  const size_t K( A.columns() );
452 
453  size_t j( 0UL );
454 
455  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
456  for( size_t i=0UL; i<M; ++i ) {
457  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
458  for( size_t k=0UL; k<K; ++k ) {
459  const IntrinsicType a1( set( A(i,k) ) );
460  xmm1 = xmm1 + a1 * B.get(k,j );
461  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
462  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
463  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
464  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
465  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
466  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
467  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
468  }
469  store( &(~C)(i,j ), xmm1 );
470  store( &(~C)(i,j+IT::size ), xmm2 );
471  store( &(~C)(i,j+IT::size*2UL), xmm3 );
472  store( &(~C)(i,j+IT::size*3UL), xmm4 );
473  store( &(~C)(i,j+IT::size*4UL), xmm5 );
474  store( &(~C)(i,j+IT::size*5UL), xmm6 );
475  store( &(~C)(i,j+IT::size*6UL), xmm7 );
476  store( &(~C)(i,j+IT::size*7UL), xmm8 );
477  }
478  }
479  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
480  size_t i( 0UL );
481  for( ; (i+2UL) <= M; i+=2UL ) {
482  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
483  for( size_t k=0UL; k<K; ++k ) {
484  const IntrinsicType a1( set( A(i ,k) ) );
485  const IntrinsicType a2( set( A(i+1UL,k) ) );
486  const IntrinsicType b1( B.get(k,j ) );
487  const IntrinsicType b2( B.get(k,j+IT::size ) );
488  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
489  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
490  xmm1 = xmm1 + a1 * b1;
491  xmm2 = xmm2 + a1 * b2;
492  xmm3 = xmm3 + a1 * b3;
493  xmm4 = xmm4 + a1 * b4;
494  xmm5 = xmm5 + a2 * b1;
495  xmm6 = xmm6 + a2 * b2;
496  xmm7 = xmm7 + a2 * b3;
497  xmm8 = xmm8 + a2 * b4;
498  }
499  store( &(~C)(i ,j ), xmm1 );
500  store( &(~C)(i ,j+IT::size ), xmm2 );
501  store( &(~C)(i ,j+IT::size*2UL), xmm3 );
502  store( &(~C)(i ,j+IT::size*3UL), xmm4 );
503  store( &(~C)(i+1UL,j ), xmm5 );
504  store( &(~C)(i+1UL,j+IT::size ), xmm6 );
505  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 );
506  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 );
507  }
508  if( i < M ) {
509  IntrinsicType xmm1, xmm2, xmm3, xmm4;
510  for( size_t k=0UL; k<K; ++k ) {
511  const IntrinsicType a1( set( A(i,k) ) );
512  xmm1 = xmm1 + a1 * B.get(k,j );
513  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
514  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
515  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
516  }
517  store( &(~C)(i,j ), xmm1 );
518  store( &(~C)(i,j+IT::size ), xmm2 );
519  store( &(~C)(i,j+IT::size*2UL), xmm3 );
520  store( &(~C)(i,j+IT::size*3UL), xmm4 );
521  }
522  }
523  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
524  size_t i( 0UL );
525  for( ; (i+2UL) <= M; i+=2UL ) {
526  IntrinsicType xmm1, xmm2, xmm3, xmm4;
527  for( size_t k=0UL; k<K; ++k ) {
528  const IntrinsicType a1( set( A(i ,k) ) );
529  const IntrinsicType a2( set( A(i+1UL,k) ) );
530  const IntrinsicType b1( B.get(k,j ) );
531  const IntrinsicType b2( B.get(k,j+IT::size) );
532  xmm1 = xmm1 + a1 * b1;
533  xmm2 = xmm2 + a1 * b2;
534  xmm3 = xmm3 + a2 * b1;
535  xmm4 = xmm4 + a2 * b2;
536  }
537  store( &(~C)(i ,j ), xmm1 );
538  store( &(~C)(i ,j+IT::size), xmm2 );
539  store( &(~C)(i+1UL,j ), xmm3 );
540  store( &(~C)(i+1UL,j+IT::size), xmm4 );
541  }
542  if( i < M ) {
543  IntrinsicType xmm1, xmm2;
544  for( size_t k=0UL; k<K; ++k ) {
545  const IntrinsicType a1( set( A(i,k) ) );
546  xmm1 = xmm1 + a1 * B.get(k,j );
547  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
548  }
549  store( &(~C)(i,j ), xmm1 );
550  store( &(~C)(i,j+IT::size), xmm2 );
551  }
552  }
553  if( j < N ) {
554  size_t i( 0UL );
555  for( ; (i+2UL) <= M; i+=2UL ) {
556  IntrinsicType xmm1, xmm2;
557  for( size_t k=0UL; k<K; ++k ) {
558  const IntrinsicType b1( B.get(k,j) );
559  xmm1 = xmm1 + set( A(i ,k) ) * b1;
560  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
561  }
562  store( &(~C)(i ,j), xmm1 );
563  store( &(~C)(i+1UL,j), xmm2 );
564  }
565  if( i < M ) {
566  IntrinsicType xmm1;
567  for( size_t k=0UL; k<K; ++k ) {
568  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
569  }
570  store( &(~C)(i,j), xmm1 );
571  }
572  }
573  }
575  //**********************************************************************************************
576 
577  //**Vectorized default assignment to column-major dense matrices********************************
591  template< typename MT3 // Type of the left-hand side target matrix
592  , typename MT4 // Type of the left-hand side matrix operand
593  , typename MT5 > // Type of the right-hand side matrix operand
594  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
595  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
596  {
597  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
598  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
599 
600  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
601  const typename MT4::OppositeType tmp( A );
602  assign( ~C, tmp * B );
603  }
604  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
605  const typename MT5::OppositeType tmp( B );
606  assign( ~C, A * tmp );
607  }
608  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
609  const typename MT4::OppositeType tmp( A );
610  assign( ~C, tmp * B );
611  }
612  else {
613  const typename MT5::OppositeType tmp( B );
614  assign( ~C, A * tmp );
615  }
616  }
618  //**********************************************************************************************
619 
620  //**BLAS-based assignment to dense matrices (default)*******************************************
633  template< typename MT3 // Type of the left-hand side target matrix
634  , typename MT4 // Type of the left-hand side matrix operand
635  , typename MT5 > // Type of the right-hand side matrix operand
636  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
637  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
638  {
639  selectDefaultAssignKernel( C, A, B );
640  }
642  //**********************************************************************************************
643 
644  //**BLAS-based assignment to dense matrices (single precision)**********************************
645 #if BLAZE_BLAS_MODE
646 
659  template< typename MT3 // Type of the left-hand side target matrix
660  , typename MT4 // Type of the left-hand side matrix operand
661  , typename MT5 > // Type of the right-hand side matrix operand
662  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
663  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
664  {
665  using boost::numeric_cast;
666 
667  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
668  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
669  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
670 
671  const int M ( numeric_cast<int>( A.rows() ) );
672  const int N ( numeric_cast<int>( B.columns() ) );
673  const int K ( numeric_cast<int>( A.columns() ) );
674  const int lda( numeric_cast<int>( A.spacing() ) );
675  const int ldb( numeric_cast<int>( B.spacing() ) );
676  const int ldc( numeric_cast<int>( C.spacing() ) );
677 
678  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
679  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
680  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
681  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
682  }
684 #endif
685  //**********************************************************************************************
686 
687  //**BLAS-based assignment to dense matrices (double precision)**********************************
688 #if BLAZE_BLAS_MODE
689 
702  template< typename MT3 // Type of the left-hand side target matrix
703  , typename MT4 // Type of the left-hand side matrix operand
704  , typename MT5 > // Type of the right-hand side matrix operand
705  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
706  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
707  {
708  using boost::numeric_cast;
709 
710  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
711  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
712  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
713 
714  const int M ( numeric_cast<int>( A.rows() ) );
715  const int N ( numeric_cast<int>( B.columns() ) );
716  const int K ( numeric_cast<int>( A.columns() ) );
717  const int lda( numeric_cast<int>( A.spacing() ) );
718  const int ldb( numeric_cast<int>( B.spacing() ) );
719  const int ldc( numeric_cast<int>( C.spacing() ) );
720 
721  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
722  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
723  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
724  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
725  }
727 #endif
728  //**********************************************************************************************
729 
730  //**BLAS-based assignment to dense matrices (single precision complex)**************************
731 #if BLAZE_BLAS_MODE
732 
745  template< typename MT3 // Type of the left-hand side target matrix
746  , typename MT4 // Type of the left-hand side matrix operand
747  , typename MT5 > // Type of the right-hand side matrix operand
748  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
749  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
750  {
751  using boost::numeric_cast;
752 
753  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
754  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
755  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
756  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
757  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
758  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
759 
760  const int M ( numeric_cast<int>( A.rows() ) );
761  const int N ( numeric_cast<int>( B.columns() ) );
762  const int K ( numeric_cast<int>( A.columns() ) );
763  const int lda( numeric_cast<int>( A.spacing() ) );
764  const int ldb( numeric_cast<int>( B.spacing() ) );
765  const int ldc( numeric_cast<int>( C.spacing() ) );
766  const complex<float> alpha( 1.0F, 0.0F );
767  const complex<float> beta ( 0.0F, 0.0F );
768 
769  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
770  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
771  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
772  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
773  }
775 #endif
776  //**********************************************************************************************
777 
778  //**BLAS-based assignment to dense matrices (double precision complex)**************************
779 #if BLAZE_BLAS_MODE
780 
793  template< typename MT3 // Type of the left-hand side target matrix
794  , typename MT4 // Type of the left-hand side matrix operand
795  , typename MT5 > // Type of the right-hand side matrix operand
796  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
797  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
798  {
799  using boost::numeric_cast;
800 
801  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
802  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
803  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
804  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
805  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
806  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
807 
808  const int M ( numeric_cast<int>( A.rows() ) );
809  const int N ( numeric_cast<int>( B.columns() ) );
810  const int K ( numeric_cast<int>( A.columns() ) );
811  const int lda( numeric_cast<int>( A.spacing() ) );
812  const int ldb( numeric_cast<int>( B.spacing() ) );
813  const int ldc( numeric_cast<int>( C.spacing() ) );
814  const complex<double> alpha( 1.0, 0.0 );
815  const complex<double> beta ( 0.0, 0.0 );
816 
817  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
818  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
819  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
820  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
821  }
823 #endif
824  //**********************************************************************************************
825 
826  //**Assignment to sparse matrices***************************************************************
838  template< typename MT // Type of the target sparse matrix
839  , bool SO > // Storage order of the target sparse matrix
840  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
841  {
843 
844  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
845 
851  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
852 
853  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
854  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
855 
856  const TmpType tmp( rhs );
857  assign( ~lhs, tmp );
858  }
860  //**********************************************************************************************
861 
862  //**Addition assignment to dense matrices*******************************************************
875  template< typename MT3 // Type of the target dense matrix
876  , bool SO > // Storage order of the target dense matrix
877  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
878  {
880 
881  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
882  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
883 
884  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
885  return;
886  }
887 
888  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
889  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
890 
891  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
892  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
893  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
894  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
895  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
896  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
897 
898  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
899  DMatDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
900  else
901  DMatDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
902  }
904  //**********************************************************************************************
905 
906  //**Default addition assignment to dense matrices***********************************************
920  template< typename MT3 // Type of the left-hand side target matrix
921  , typename MT4 // Type of the left-hand side matrix operand
922  , typename MT5 > // Type of the right-hand side matrix operand
923  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
924  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
925  {
926  const size_t M( A.rows() );
927  const size_t N( B.columns() );
928  const size_t K( A.columns() );
929 
930  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
931  const size_t end( N & size_t(-2) );
932 
933  for( size_t i=0UL; i<M; ++i ) {
934  for( size_t k=0UL; k<K; ++k ) {
935  for( size_t j=0UL; j<end; j+=2UL ) {
936  C(i,j ) += A(i,k) * B(k,j );
937  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
938  }
939  if( end < N ) {
940  C(i,end) += A(i,k) * B(k,end);
941  }
942  }
943  }
944  }
946  //**********************************************************************************************
947 
948  //**Vectorized default addition assignment to row-major dense matrices**************************
962  template< typename MT3 // Type of the left-hand side target matrix
963  , typename MT4 // Type of the left-hand side matrix operand
964  , typename MT5 > // Type of the right-hand side matrix operand
965  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
966  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
967  {
968  typedef IntrinsicTrait<ElementType> IT;
969 
970  const size_t M( A.rows() );
971  const size_t N( B.spacing() );
972  const size_t K( A.columns() );
973 
974  size_t j( 0UL );
975 
976  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
977  for( size_t i=0UL; i<M; ++i ) {
978  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
979  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
980  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
981  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
982  IntrinsicType xmm5( load( &(~C)(i,j+IT::size*4UL) ) );
983  IntrinsicType xmm6( load( &(~C)(i,j+IT::size*5UL) ) );
984  IntrinsicType xmm7( load( &(~C)(i,j+IT::size*6UL) ) );
985  IntrinsicType xmm8( load( &(~C)(i,j+IT::size*7UL) ) );
986  for( size_t k=0UL; k<K; ++k ) {
987  const IntrinsicType a1( set( A(i,k) ) );
988  xmm1 = xmm1 + a1 * B.get(k,j );
989  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
990  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
991  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
992  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
993  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
994  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
995  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
996  }
997  store( &(~C)(i,j ), xmm1 );
998  store( &(~C)(i,j+IT::size ), xmm2 );
999  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1000  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1001  store( &(~C)(i,j+IT::size*4UL), xmm5 );
1002  store( &(~C)(i,j+IT::size*5UL), xmm6 );
1003  store( &(~C)(i,j+IT::size*6UL), xmm7 );
1004  store( &(~C)(i,j+IT::size*7UL), xmm8 );
1005  }
1006  }
1007  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
1008  size_t i( 0UL );
1009  for( ; (i+2UL) <= M; i+=2UL ) {
1010  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1011  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size ) ) );
1012  IntrinsicType xmm3( load( &(~C)(i ,j+IT::size*2UL) ) );
1013  IntrinsicType xmm4( load( &(~C)(i ,j+IT::size*3UL) ) );
1014  IntrinsicType xmm5( load( &(~C)(i+1UL,j ) ) );
1015  IntrinsicType xmm6( load( &(~C)(i+1UL,j+IT::size ) ) );
1016  IntrinsicType xmm7( load( &(~C)(i+1UL,j+IT::size*2UL) ) );
1017  IntrinsicType xmm8( load( &(~C)(i+1UL,j+IT::size*3UL) ) );
1018  for( size_t k=0UL; k<K; ++k ) {
1019  const IntrinsicType a1( set( A(i ,k) ) );
1020  const IntrinsicType a2( set( A(i+1UL,k) ) );
1021  const IntrinsicType b1( B.get(k,j ) );
1022  const IntrinsicType b2( B.get(k,j+IT::size ) );
1023  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
1024  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
1025  xmm1 = xmm1 + a1 * b1;
1026  xmm2 = xmm2 + a1 * b2;
1027  xmm3 = xmm3 + a1 * b3;
1028  xmm4 = xmm4 + a1 * b4;
1029  xmm5 = xmm5 + a2 * b1;
1030  xmm6 = xmm6 + a2 * b2;
1031  xmm7 = xmm7 + a2 * b3;
1032  xmm8 = xmm8 + a2 * b4;
1033  }
1034  store( &(~C)(i ,j ), xmm1 );
1035  store( &(~C)(i ,j+IT::size ), xmm2 );
1036  store( &(~C)(i ,j+IT::size*2UL), xmm3 );
1037  store( &(~C)(i ,j+IT::size*3UL), xmm4 );
1038  store( &(~C)(i+1UL,j ), xmm5 );
1039  store( &(~C)(i+1UL,j+IT::size ), xmm6 );
1040  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 );
1041  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 );
1042  }
1043  if( i < M ) {
1044  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1045  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
1046  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
1047  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
1048  for( size_t k=0UL; k<K; ++k ) {
1049  const IntrinsicType a1( set( A(i,k) ) );
1050  xmm1 = xmm1 + a1 * B.get(k,j );
1051  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
1052  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
1053  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
1054  }
1055  store( &(~C)(i,j ), xmm1 );
1056  store( &(~C)(i,j+IT::size ), xmm2 );
1057  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1058  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1059  }
1060  }
1061  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1062  size_t i( 0UL );
1063  for( ; (i+2UL) <= M; i+=2UL ) {
1064  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1065  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size) ) );
1066  IntrinsicType xmm3( load( &(~C)(i+1UL,j ) ) );
1067  IntrinsicType xmm4( load( &(~C)(i+1UL,j+IT::size) ) );
1068  for( size_t k=0UL; k<K; ++k ) {
1069  const IntrinsicType a1( set( A(i ,k) ) );
1070  const IntrinsicType a2( set( A(i+1UL,k) ) );
1071  const IntrinsicType b1( B.get(k,j ) );
1072  const IntrinsicType b2( B.get(k,j+IT::size) );
1073  xmm1 = xmm1 + a1 * b1;
1074  xmm2 = xmm2 + a1 * b2;
1075  xmm3 = xmm3 + a2 * b1;
1076  xmm4 = xmm4 + a2 * b2;
1077  }
1078  store( &(~C)(i ,j ), xmm1 );
1079  store( &(~C)(i ,j+IT::size), xmm2 );
1080  store( &(~C)(i+1UL,j ), xmm3 );
1081  store( &(~C)(i+1UL,j+IT::size), xmm4 );
1082  }
1083  if( i < M ) {
1084  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1085  IntrinsicType xmm2( load( &(~C)(i,j+IT::size) ) );
1086  for( size_t k=0UL; k<K; ++k ) {
1087  const IntrinsicType a1( set( A(i,k) ) );
1088  xmm1 = xmm1 + a1 * B.get(k,j );
1089  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
1090  }
1091  store( &(~C)(i,j ), xmm1 );
1092  store( &(~C)(i,j+IT::size), xmm2 );
1093  }
1094  }
1095  if( j < N ) {
1096  size_t i( 0UL );
1097  for( ; (i+2UL) <= M; i+=2UL ) {
1098  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1099  IntrinsicType xmm2( load( &(~C)(i+1UL,j) ) );
1100  for( size_t k=0UL; k<K; ++k ) {
1101  const IntrinsicType b1( B.get(k,j) );
1102  xmm1 = xmm1 + set( A(i ,k) ) * b1;
1103  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
1104  }
1105  store( &(~C)(i ,j), xmm1 );
1106  store( &(~C)(i+1UL,j), xmm2 );
1107  }
1108  if( i < M ) {
1109  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1110  for( size_t k=0UL; k<K; ++k ) {
1111  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
1112  }
1113  store( &(~C)(i,j), xmm1 );
1114  }
1115  }
1116  }
1118  //**********************************************************************************************
1119 
1120  //**Vectorized default addition assignment to column-major dense matrices***********************
1134  template< typename MT3 // Type of the left-hand side target matrix
1135  , typename MT4 // Type of the left-hand side matrix operand
1136  , typename MT5 > // Type of the right-hand side matrix operand
1137  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1138  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1139  {
1140  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
1141  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
1142 
1143  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1144  const typename MT4::OppositeType tmp( A );
1145  addAssign( ~C, tmp * B );
1146  }
1147  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1148  const typename MT5::OppositeType tmp( B );
1149  addAssign( ~C, A * tmp );
1150  }
1151  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1152  const typename MT4::OppositeType tmp( A );
1153  addAssign( ~C, tmp * B );
1154  }
1155  else {
1156  const typename MT5::OppositeType tmp( B );
1157  addAssign( ~C, A * tmp );
1158  }
1159  }
1161  //**********************************************************************************************
1162 
1163  //**BLAS-based addition assignment to dense matrices (default)**********************************
1177  template< typename MT3 // Type of the left-hand side target matrix
1178  , typename MT4 // Type of the left-hand side matrix operand
1179  , typename MT5 > // Type of the right-hand side matrix operand
1180  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1181  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1182  {
1183  selectDefaultAddAssignKernel( C, A, B );
1184  }
1186  //**********************************************************************************************
1187 
1188  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1189 #if BLAZE_BLAS_MODE
1190 
1203  template< typename MT3 // Type of the left-hand side target matrix
1204  , typename MT4 // Type of the left-hand side matrix operand
1205  , typename MT5 > // Type of the right-hand side matrix operand
1206  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1207  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1208  {
1209  using boost::numeric_cast;
1210 
1211  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1212  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1213  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1214 
1215  const int M ( numeric_cast<int>( A.rows() ) );
1216  const int N ( numeric_cast<int>( B.columns() ) );
1217  const int K ( numeric_cast<int>( A.columns() ) );
1218  const int lda( numeric_cast<int>( A.spacing() ) );
1219  const int ldb( numeric_cast<int>( B.spacing() ) );
1220  const int ldc( numeric_cast<int>( C.spacing() ) );
1221 
1222  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1223  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1224  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1225  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1226  }
1228 #endif
1229  //**********************************************************************************************
1230 
1231  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1232 #if BLAZE_BLAS_MODE
1233 
1246  template< typename MT3 // Type of the left-hand side target matrix
1247  , typename MT4 // Type of the left-hand side matrix operand
1248  , typename MT5 > // Type of the right-hand side matrix operand
1249  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1250  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1251  {
1252  using boost::numeric_cast;
1253 
1254  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1255  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1256  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1257 
1258  const int M ( numeric_cast<int>( A.rows() ) );
1259  const int N ( numeric_cast<int>( B.columns() ) );
1260  const int K ( numeric_cast<int>( A.columns() ) );
1261  const int lda( numeric_cast<int>( A.spacing() ) );
1262  const int ldb( numeric_cast<int>( B.spacing() ) );
1263  const int ldc( numeric_cast<int>( C.spacing() ) );
1264 
1265  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1266  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1267  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1268  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1269  }
1271 #endif
1272  //**********************************************************************************************
1273 
1274  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1275 #if BLAZE_BLAS_MODE
1276 
1289  template< typename MT3 // Type of the left-hand side target matrix
1290  , typename MT4 // Type of the left-hand side matrix operand
1291  , typename MT5 > // Type of the right-hand side matrix operand
1292  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1293  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1294  {
1295  using boost::numeric_cast;
1296 
1297  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1298  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1299  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1300  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1301  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1302  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1303 
1304  const int M ( numeric_cast<int>( A.rows() ) );
1305  const int N ( numeric_cast<int>( B.columns() ) );
1306  const int K ( numeric_cast<int>( A.columns() ) );
1307  const int lda( numeric_cast<int>( A.spacing() ) );
1308  const int ldb( numeric_cast<int>( B.spacing() ) );
1309  const int ldc( numeric_cast<int>( C.spacing() ) );
1310  const complex<float> alpha( 1.0F, 0.0F );
1311  const complex<float> beta ( 1.0F, 0.0F );
1312 
1313  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1314  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1315  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1316  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1317  }
1319 #endif
1320  //**********************************************************************************************
1321 
1322  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1323 #if BLAZE_BLAS_MODE
1324 
1337  template< typename MT3 // Type of the left-hand side target matrix
1338  , typename MT4 // Type of the left-hand side matrix operand
1339  , typename MT5 > // Type of the right-hand side matrix operand
1340  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1341  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1342  {
1343  using boost::numeric_cast;
1344 
1345  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1346  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1347  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1348  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1349  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1350  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1351 
1352  const int M ( numeric_cast<int>( A.rows() ) );
1353  const int N ( numeric_cast<int>( B.columns() ) );
1354  const int K ( numeric_cast<int>( A.columns() ) );
1355  const int lda( numeric_cast<int>( A.spacing() ) );
1356  const int ldb( numeric_cast<int>( B.spacing() ) );
1357  const int ldc( numeric_cast<int>( C.spacing() ) );
1358  const complex<double> alpha( 1.0, 0.0 );
1359  const complex<double> beta ( 1.0, 0.0 );
1360 
1361  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1362  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1363  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1364  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1365  }
1367 #endif
1368  //**********************************************************************************************
1369 
1370  //**Addition assignment to sparse matrices******************************************************
1371  // No special implementation for the addition assignment to sparse matrices.
1372  //**********************************************************************************************
1373 
1374  //**Subtraction assignment to dense matrices****************************************************
1387  template< typename MT3 // Type of the target dense matrix
1388  , bool SO > // Storage order of the target dense matrix
1389  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
1390  {
1392 
1393  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1394  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1395 
1396  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1397  return;
1398  }
1399 
1400  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1401  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1402 
1403  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1404  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1405  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1406  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1407  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1408  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1409 
1410  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
1411  DMatDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1412  else
1413  DMatDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1414  }
1416  //**********************************************************************************************
1417 
1418  //**Default subtraction assignment to dense matrices********************************************
1432  template< typename MT3 // Type of the left-hand side target matrix
1433  , typename MT4 // Type of the left-hand side matrix operand
1434  , typename MT5 > // Type of the right-hand side matrix operand
1435  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1436  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1437  {
1438  const size_t M( A.rows() );
1439  const size_t N( B.columns() );
1440  const size_t K( A.columns() );
1441 
1442  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1443  const size_t end( N & size_t(-2) );
1444 
1445  for( size_t i=0UL; i<M; ++i ) {
1446  for( size_t k=0UL; k<K; ++k ) {
1447  for( size_t j=0UL; j<end; j+=2UL ) {
1448  C(i,j ) -= A(i,k) * B(k,j );
1449  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1450  }
1451  if( end < N ) {
1452  C(i,end) -= A(i,k) * B(k,end);
1453  }
1454  }
1455  }
1456  }
1458  //**********************************************************************************************
1459 
1460  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1474  template< typename MT3 // Type of the left-hand side target matrix
1475  , typename MT4 // Type of the left-hand side matrix operand
1476  , typename MT5 > // Type of the right-hand side matrix operand
1477  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1478  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1479  {
1480  typedef IntrinsicTrait<ElementType> IT;
1481 
1482  const size_t M( A.rows() );
1483  const size_t N( B.spacing() );
1484  const size_t K( A.columns() );
1485 
1486  size_t j( 0UL );
1487 
1488  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
1489  for( size_t i=0UL; i<M; ++i ) {
1490  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1491  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
1492  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
1493  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
1494  IntrinsicType xmm5( load( &(~C)(i,j+IT::size*4UL) ) );
1495  IntrinsicType xmm6( load( &(~C)(i,j+IT::size*5UL) ) );
1496  IntrinsicType xmm7( load( &(~C)(i,j+IT::size*6UL) ) );
1497  IntrinsicType xmm8( load( &(~C)(i,j+IT::size*7UL) ) );
1498  for( size_t k=0UL; k<K; ++k ) {
1499  const IntrinsicType a1( set( A(i,k) ) );
1500  xmm1 = xmm1 - a1 * B.get(k,j );
1501  xmm2 = xmm2 - a1 * B.get(k,j+IT::size );
1502  xmm3 = xmm3 - a1 * B.get(k,j+IT::size*2UL);
1503  xmm4 = xmm4 - a1 * B.get(k,j+IT::size*3UL);
1504  xmm5 = xmm5 - a1 * B.get(k,j+IT::size*4UL);
1505  xmm6 = xmm6 - a1 * B.get(k,j+IT::size*5UL);
1506  xmm7 = xmm7 - a1 * B.get(k,j+IT::size*6UL);
1507  xmm8 = xmm8 - a1 * B.get(k,j+IT::size*7UL);
1508  }
1509  store( &(~C)(i,j ), xmm1 );
1510  store( &(~C)(i,j+IT::size ), xmm2 );
1511  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1512  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1513  store( &(~C)(i,j+IT::size*4UL), xmm5 );
1514  store( &(~C)(i,j+IT::size*5UL), xmm6 );
1515  store( &(~C)(i,j+IT::size*6UL), xmm7 );
1516  store( &(~C)(i,j+IT::size*7UL), xmm8 );
1517  }
1518  }
1519  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
1520  size_t i( 0UL );
1521  for( ; (i+2UL) <= M; i+=2UL ) {
1522  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1523  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size ) ) );
1524  IntrinsicType xmm3( load( &(~C)(i ,j+IT::size*2UL) ) );
1525  IntrinsicType xmm4( load( &(~C)(i ,j+IT::size*3UL) ) );
1526  IntrinsicType xmm5( load( &(~C)(i+1UL,j ) ) );
1527  IntrinsicType xmm6( load( &(~C)(i+1UL,j+IT::size ) ) );
1528  IntrinsicType xmm7( load( &(~C)(i+1UL,j+IT::size*2UL) ) );
1529  IntrinsicType xmm8( load( &(~C)(i+1UL,j+IT::size*3UL) ) );
1530  for( size_t k=0UL; k<K; ++k ) {
1531  const IntrinsicType a1( set( A(i ,k) ) );
1532  const IntrinsicType a2( set( A(i+1UL,k) ) );
1533  const IntrinsicType b1( B.get(k,j ) );
1534  const IntrinsicType b2( B.get(k,j+IT::size ) );
1535  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
1536  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
1537  xmm1 = xmm1 - a1 * b1;
1538  xmm2 = xmm2 - a1 * b2;
1539  xmm3 = xmm3 - a1 * b3;
1540  xmm4 = xmm4 - a1 * b4;
1541  xmm5 = xmm5 - a2 * b1;
1542  xmm6 = xmm6 - a2 * b2;
1543  xmm7 = xmm7 - a2 * b3;
1544  xmm8 = xmm8 - a2 * b4;
1545  }
1546  store( &(~C)(i ,j ), xmm1 );
1547  store( &(~C)(i ,j+IT::size ), xmm2 );
1548  store( &(~C)(i ,j+IT::size*2UL), xmm3 );
1549  store( &(~C)(i ,j+IT::size*3UL), xmm4 );
1550  store( &(~C)(i+1UL,j ), xmm5 );
1551  store( &(~C)(i+1UL,j+IT::size ), xmm6 );
1552  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 );
1553  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 );
1554  }
1555  if( i < M ) {
1556  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1557  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
1558  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
1559  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
1560  for( size_t k=0UL; k<K; ++k ) {
1561  const IntrinsicType a1( set( A(i,k) ) );
1562  xmm1 = xmm1 - a1 * B.get(k,j );
1563  xmm2 = xmm2 - a1 * B.get(k,j+IT::size );
1564  xmm3 = xmm3 - a1 * B.get(k,j+IT::size*2UL);
1565  xmm4 = xmm4 - a1 * B.get(k,j+IT::size*3UL);
1566  }
1567  store( &(~C)(i,j ), xmm1 );
1568  store( &(~C)(i,j+IT::size ), xmm2 );
1569  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1570  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1571  }
1572  }
1573  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1574  size_t i( 0UL );
1575  for( ; (i+2UL) <= M; i+=2UL ) {
1576  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1577  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size) ) );
1578  IntrinsicType xmm3( load( &(~C)(i+1UL,j ) ) );
1579  IntrinsicType xmm4( load( &(~C)(i+1UL,j+IT::size) ) );
1580  for( size_t k=0UL; k<K; ++k ) {
1581  const IntrinsicType a1( set( A(i ,k) ) );
1582  const IntrinsicType a2( set( A(i+1UL,k) ) );
1583  const IntrinsicType b1( B.get(k,j ) );
1584  const IntrinsicType b2( B.get(k,j+IT::size) );
1585  xmm1 = xmm1 - a1 * b1;
1586  xmm2 = xmm2 - a1 * b2;
1587  xmm3 = xmm3 - a2 * b1;
1588  xmm4 = xmm4 - a2 * b2;
1589  }
1590  store( &(~C)(i ,j ), xmm1 );
1591  store( &(~C)(i ,j+IT::size), xmm2 );
1592  store( &(~C)(i+1UL,j ), xmm3 );
1593  store( &(~C)(i+1UL,j+IT::size), xmm4 );
1594  }
1595  if( i < M ) {
1596  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1597  IntrinsicType xmm2( load( &(~C)(i,j+IT::size) ) );
1598  for( size_t k=0UL; k<K; ++k ) {
1599  const IntrinsicType a1( set( A(i,k) ) );
1600  xmm1 = xmm1 - a1 * B.get(k,j );
1601  xmm2 = xmm2 - a1 * B.get(k,j+IT::size);
1602  }
1603  store( &(~C)(i,j ), xmm1 );
1604  store( &(~C)(i,j+IT::size), xmm2 );
1605  }
1606  }
1607  if( j < N ) {
1608  size_t i( 0UL );
1609  for( ; (i+2UL) <= M; i+=2UL ) {
1610  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1611  IntrinsicType xmm2( load( &(~C)(i+1UL,j) ) );
1612  for( size_t k=0UL; k<K; ++k ) {
1613  const IntrinsicType b1( B.get(k,j) );
1614  xmm1 = xmm1 - set( A(i ,k) ) * b1;
1615  xmm2 = xmm2 - set( A(i+1UL,k) ) * b1;
1616  }
1617  store( &(~C)(i ,j), xmm1 );
1618  store( &(~C)(i+1UL,j), xmm2 );
1619  }
1620  if( i < M ) {
1621  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1622  for( size_t k=0UL; k<K; ++k ) {
1623  xmm1 = xmm1 - set( A(i,k) ) * B.get(k,j);
1624  }
1625  store( &(~C)(i,j), xmm1 );
1626  }
1627  }
1628  }
1630  //**********************************************************************************************
1631 
1632  //**Vectorized default subtraction assignment to column-major dense matrices********************
1646  template< typename MT3 // Type of the left-hand side target matrix
1647  , typename MT4 // Type of the left-hand side matrix operand
1648  , typename MT5 > // Type of the right-hand side matrix operand
1649  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1650  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1651  {
1652  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
1653  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
1654 
1655  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1656  const typename MT4::OppositeType tmp( A );
1657  subAssign( ~C, tmp * B );
1658  }
1659  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1660  const typename MT5::OppositeType tmp( B );
1661  subAssign( ~C, A * tmp );
1662  }
1663  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1664  const typename MT4::OppositeType tmp( A );
1665  subAssign( ~C, tmp * B );
1666  }
1667  else {
1668  const typename MT5::OppositeType tmp( B );
1669  subAssign( ~C, A * tmp );
1670  }
1671  }
1673  //**********************************************************************************************
1674 
1675  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1689  template< typename MT3 // Type of the left-hand side target matrix
1690  , typename MT4 // Type of the left-hand side matrix operand
1691  , typename MT5 > // Type of the right-hand side matrix operand
1692  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1693  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1694  {
1695  selectDefaultSubAssignKernel( C, A, B );
1696  }
1698  //**********************************************************************************************
1699 
1700  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1701 #if BLAZE_BLAS_MODE
1702 
1715  template< typename MT3 // Type of the left-hand side target matrix
1716  , typename MT4 // Type of the left-hand side matrix operand
1717  , typename MT5 > // Type of the right-hand side matrix operand
1718  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1719  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1720  {
1721  using boost::numeric_cast;
1722 
1723  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1724  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1725  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1726 
1727  const int M ( numeric_cast<int>( A.rows() ) );
1728  const int N ( numeric_cast<int>( B.columns() ) );
1729  const int K ( numeric_cast<int>( A.columns() ) );
1730  const int lda( numeric_cast<int>( A.spacing() ) );
1731  const int ldb( numeric_cast<int>( B.spacing() ) );
1732  const int ldc( numeric_cast<int>( C.spacing() ) );
1733 
1734  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1735  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1736  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1737  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1738  }
1740 #endif
1741  //**********************************************************************************************
1742 
1743  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1744 #if BLAZE_BLAS_MODE
1745 
1758  template< typename MT3 // Type of the left-hand side target matrix
1759  , typename MT4 // Type of the left-hand side matrix operand
1760  , typename MT5 > // Type of the right-hand side matrix operand
1761  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1762  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1763  {
1764  using boost::numeric_cast;
1765 
1766  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1767  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1768  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1769 
1770  const int M ( numeric_cast<int>( A.rows() ) );
1771  const int N ( numeric_cast<int>( B.columns() ) );
1772  const int K ( numeric_cast<int>( A.columns() ) );
1773  const int lda( numeric_cast<int>( A.spacing() ) );
1774  const int ldb( numeric_cast<int>( B.spacing() ) );
1775  const int ldc( numeric_cast<int>( C.spacing() ) );
1776 
1777  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1778  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1779  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1780  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1781  }
1783 #endif
1784  //**********************************************************************************************
1785 
1786  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1787 #if BLAZE_BLAS_MODE
1788 
1801  template< typename MT3 // Type of the left-hand side target matrix
1802  , typename MT4 // Type of the left-hand side matrix operand
1803  , typename MT5 > // Type of the right-hand side matrix operand
1804  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1805  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1806  {
1807  using boost::numeric_cast;
1808 
1809  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1810  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1811  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1812  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1813  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1814  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1815 
1816  const int M ( numeric_cast<int>( A.rows() ) );
1817  const int N ( numeric_cast<int>( B.columns() ) );
1818  const int K ( numeric_cast<int>( A.columns() ) );
1819  const int lda( numeric_cast<int>( A.spacing() ) );
1820  const int ldb( numeric_cast<int>( B.spacing() ) );
1821  const int ldc( numeric_cast<int>( C.spacing() ) );
1822  const complex<float> alpha( -1.0F, 0.0F );
1823  const complex<float> beta ( 1.0F, 0.0F );
1824 
1825  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1826  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1827  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1828  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1829  }
1831 #endif
1832  //**********************************************************************************************
1833 
1834  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1835 #if BLAZE_BLAS_MODE
1836 
1849  template< typename MT3 // Type of the left-hand side target matrix
1850  , typename MT4 // Type of the left-hand side matrix operand
1851  , typename MT5 > // Type of the right-hand side matrix operand
1852  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1853  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1854  {
1855  using boost::numeric_cast;
1856 
1857  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1858  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1859  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1860  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1861  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1862  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1863 
1864  const int M ( numeric_cast<int>( A.rows() ) );
1865  const int N ( numeric_cast<int>( B.columns() ) );
1866  const int K ( numeric_cast<int>( A.columns() ) );
1867  const int lda( numeric_cast<int>( A.spacing() ) );
1868  const int ldb( numeric_cast<int>( B.spacing() ) );
1869  const int ldc( numeric_cast<int>( C.spacing() ) );
1870  const complex<double> alpha( -1.0, 0.0 );
1871  const complex<double> beta ( 1.0, 0.0 );
1872 
1873  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1874  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1875  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1876  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1877  }
1879 #endif
1880  //**********************************************************************************************
1881 
1882  //**Subtraction assignment to sparse matrices***************************************************
1883  // No special implementation for the subtraction assignment to sparse matrices.
1884  //**********************************************************************************************
1885 
1886  //**Multiplication assignment to dense matrices*************************************************
1887  // No special implementation for the multiplication assignment to dense matrices.
1888  //**********************************************************************************************
1889 
1890  //**Multiplication assignment to sparse matrices************************************************
1891  // No special implementation for the multiplication assignment to sparse matrices.
1892  //**********************************************************************************************
1893 
1894  //**Compile time checks*************************************************************************
1901  //**********************************************************************************************
1902 };
1903 //*************************************************************************************************
1904 
1905 
1906 
1907 
1908 //=================================================================================================
1909 //
1910 // DMATSCALARMULTEXPR SPECIALIZATION
1911 //
1912 //=================================================================================================
1913 
1914 //*************************************************************************************************
1922 template< typename MT1 // Type of the left-hand side dense matrix
1923  , typename MT2 // Type of the right-hand side dense matrix
1924  , typename ST > // Type of the right-hand side scalar value
1925 class DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >
1926  : public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
1927  , private Expression
1928  , private Computation
1929 {
1930  private:
1931  //**Type definitions****************************************************************************
1932  typedef DMatDMatMultExpr<MT1,MT2> MMM;
1933  typedef typename MMM::ResultType RES;
1934  typedef typename MT1::ResultType RT1;
1935  typedef typename MT2::ResultType RT2;
1936  typedef typename MT1::CompositeType CT1;
1937  typedef typename MT2::CompositeType CT2;
1938  //**********************************************************************************************
1939 
1940  //**********************************************************************************************
1942 
1945  template< typename T1, typename T2, typename T3, typename T4 >
1946  struct UseSinglePrecisionKernel {
1947  enum { value = IsFloat<typename T1::ElementType>::value &&
1948  IsFloat<typename T2::ElementType>::value &&
1949  IsFloat<typename T3::ElementType>::value &&
1950  !IsComplex<T4>::value };
1951  };
1952  //**********************************************************************************************
1953 
1954  //**********************************************************************************************
1956 
1959  template< typename T1, typename T2, typename T3, typename T4 >
1960  struct UseDoublePrecisionKernel {
1961  enum { value = IsDouble<typename T1::ElementType>::value &&
1962  IsDouble<typename T2::ElementType>::value &&
1963  IsDouble<typename T3::ElementType>::value &&
1964  !IsComplex<T4>::value };
1965  };
1966  //**********************************************************************************************
1967 
1968  //**********************************************************************************************
1970 
1973  template< typename T1, typename T2, typename T3 >
1974  struct UseSinglePrecisionComplexKernel {
1975  typedef complex<float> Type;
1976  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1977  IsSame<typename T2::ElementType,Type>::value &&
1978  IsSame<typename T3::ElementType,Type>::value };
1979  };
1980  //**********************************************************************************************
1981 
1982  //**********************************************************************************************
1984 
1987  template< typename T1, typename T2, typename T3 >
1988  struct UseDoublePrecisionComplexKernel {
1989  typedef complex<double> Type;
1990  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1991  IsSame<typename T2::ElementType,Type>::value &&
1992  IsSame<typename T3::ElementType,Type>::value };
1993  };
1994  //**********************************************************************************************
1995 
1996  //**********************************************************************************************
1998 
2000  template< typename T1, typename T2, typename T3, typename T4 >
2001  struct UseDefaultKernel {
2002  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2003  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2004  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2005  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2006  };
2007  //**********************************************************************************************
2008 
2009  //**********************************************************************************************
2011 
2013  template< typename T1, typename T2, typename T3, typename T4 >
2014  struct UseVectorizedDefaultKernel {
2015  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2016  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2017  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2018  IsSame<typename T1::ElementType,T4>::value &&
2019  IntrinsicTrait<typename T1::ElementType>::addition &&
2020  IntrinsicTrait<typename T1::ElementType>::multiplication };
2021  };
2022  //**********************************************************************************************
2023 
2024  public:
2025  //**Type definitions****************************************************************************
2026  typedef DMatScalarMultExpr<MMM,ST,false> This;
2027  typedef typename MultTrait<RES,ST>::Type ResultType;
2028  typedef typename ResultType::OppositeType OppositeType;
2029  typedef typename ResultType::TransposeType TransposeType;
2030  typedef typename ResultType::ElementType ElementType;
2031  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2032  typedef const ElementType ReturnType;
2033  typedef const ResultType CompositeType;
2034 
2036  typedef const DMatDMatMultExpr<MT1,MT2> LeftOperand;
2037 
2039  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
2040 
2042  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
2043 
2045  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
2046  //**********************************************************************************************
2047 
2048  //**Compilation flags***************************************************************************
2050  enum { vectorizable = 0 };
2051  //**********************************************************************************************
2052 
2053  //**Constructor*********************************************************************************
2059  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2060  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2061  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2062  {}
2063  //**********************************************************************************************
2064 
2065  //**Access operator*****************************************************************************
2072  inline ReturnType operator()( size_t i, size_t j ) const {
2073  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2074  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2075  return matrix_(i,j) * scalar_;
2076  }
2077  //**********************************************************************************************
2078 
2079  //**Rows function*******************************************************************************
2084  inline size_t rows() const {
2085  return matrix_.rows();
2086  }
2087  //**********************************************************************************************
2088 
2089  //**Columns function****************************************************************************
2094  inline size_t columns() const {
2095  return matrix_.columns();
2096  }
2097  //**********************************************************************************************
2098 
2099  //**Left operand access*************************************************************************
2104  inline LeftOperand leftOperand() const {
2105  return matrix_;
2106  }
2107  //**********************************************************************************************
2108 
2109  //**Right operand access************************************************************************
2114  inline RightOperand rightOperand() const {
2115  return scalar_;
2116  }
2117  //**********************************************************************************************
2118 
2119  //**********************************************************************************************
2125  template< typename T >
2126  inline bool canAlias( const T* alias ) const {
2127  return matrix_.canAlias( alias );
2128  }
2129  //**********************************************************************************************
2130 
2131  //**********************************************************************************************
2137  template< typename T >
2138  inline bool isAliased( const T* alias ) const {
2139  return matrix_.isAliased( alias );
2140  }
2141  //**********************************************************************************************
2142 
2143  private:
2144  //**Member variables****************************************************************************
2145  LeftOperand matrix_;
2146  RightOperand scalar_;
2147  //**********************************************************************************************
2148 
2149  //**Assignment to dense matrices****************************************************************
2158  template< typename MT3 // Type of the target dense matrix
2159  , bool SO > // Storage order of the target dense matrix
2160  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2161  {
2163 
2164  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2165  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2166 
2167  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2168  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2169 
2170  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2171  return;
2172  }
2173  else if( left.columns() == 0UL ) {
2174  reset( ~lhs );
2175  return;
2176  }
2177 
2178  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2179  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2180 
2181  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2182  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2183  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2184  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2185  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2186  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2187 
2188  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
2189  DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2190  else
2191  DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2192  }
2193  //**********************************************************************************************
2194 
2195  //**Default assignment to dense matrices********************************************************
2209  template< typename MT3 // Type of the left-hand side target matrix
2210  , typename MT4 // Type of the left-hand side matrix operand
2211  , typename MT5 // Type of the right-hand side matrix operand
2212  , typename ST2 > // Type of the scalar value
2213  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2214  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2215  {
2216  const size_t M( A.rows() );
2217  const size_t N( B.columns() );
2218  const size_t K( A.columns() );
2219 
2220  for( size_t i=0UL; i<M; ++i ) {
2221  for( size_t j=0UL; j<N; ++j ) {
2222  C(i,j) = A(i,0UL) * B(0UL,j);
2223  }
2224  for( size_t k=1UL; k<K; ++k ) {
2225  for( size_t j=0UL; j<N; ++j ) {
2226  C(i,j) += A(i,k) * B(k,j);
2227  }
2228  }
2229  for( size_t j=0UL; j<N; ++j ) {
2230  C(i,j) *= scalar;
2231  }
2232  }
2233  }
2234  //**********************************************************************************************
2235 
2236  //**Vectorized default assignment to row-major dense matrices***********************************
2250  template< typename MT3 // Type of the left-hand side target matrix
2251  , typename MT4 // Type of the left-hand side matrix operand
2252  , typename MT5 // Type of the right-hand side matrix operand
2253  , typename ST2 > // Type of the scalar value
2254  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2255  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2256  {
2257  typedef IntrinsicTrait<ElementType> IT;
2258 
2259  const size_t M( A.rows() );
2260  const size_t N( B.spacing() );
2261  const size_t K( A.columns() );
2262 
2263  const IntrinsicType factor( set( scalar ) );
2264 
2265  size_t j( 0UL );
2266 
2267  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2268  for( size_t i=0UL; i<M; ++i ) {
2269  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2270  for( size_t k=0UL; k<K; ++k ) {
2271  const IntrinsicType a1( set( A(i,k) ) );
2272  xmm1 = xmm1 + a1 * B.get(k,j );
2273  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2274  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2275  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2276  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
2277  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
2278  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
2279  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
2280  }
2281  store( &(~C)(i,j ), xmm1 * factor );
2282  store( &(~C)(i,j+IT::size ), xmm2 * factor );
2283  store( &(~C)(i,j+IT::size*2UL), xmm3 * factor );
2284  store( &(~C)(i,j+IT::size*3UL), xmm4 * factor );
2285  store( &(~C)(i,j+IT::size*4UL), xmm5 * factor );
2286  store( &(~C)(i,j+IT::size*5UL), xmm6 * factor );
2287  store( &(~C)(i,j+IT::size*6UL), xmm7 * factor );
2288  store( &(~C)(i,j+IT::size*7UL), xmm8 * factor );
2289  }
2290  }
2291  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2292  size_t i( 0UL );
2293  for( ; (i+2UL) <= M; i+=2UL ) {
2294  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2295  for( size_t k=0UL; k<K; ++k ) {
2296  const IntrinsicType a1( set( A(i ,k) ) );
2297  const IntrinsicType a2( set( A(i+1UL,k) ) );
2298  const IntrinsicType b1( B.get(k,j ) );
2299  const IntrinsicType b2( B.get(k,j+IT::size ) );
2300  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
2301  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
2302  xmm1 = xmm1 + a1 * b1;
2303  xmm2 = xmm2 + a1 * b2;
2304  xmm3 = xmm3 + a1 * b3;
2305  xmm4 = xmm4 + a1 * b4;
2306  xmm5 = xmm5 + a2 * b1;
2307  xmm6 = xmm6 + a2 * b2;
2308  xmm7 = xmm7 + a2 * b3;
2309  xmm8 = xmm8 + a2 * b4;
2310  }
2311  store( &(~C)(i ,j ), xmm1 * factor );
2312  store( &(~C)(i ,j+IT::size ), xmm2 * factor );
2313  store( &(~C)(i ,j+IT::size*2UL), xmm3 * factor );
2314  store( &(~C)(i ,j+IT::size*3UL), xmm4 * factor );
2315  store( &(~C)(i+1UL,j ), xmm5 * factor );
2316  store( &(~C)(i+1UL,j+IT::size ), xmm6 * factor );
2317  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 * factor );
2318  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 * factor );
2319  }
2320  if( i < M ) {
2321  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2322  for( size_t k=0UL; k<K; ++k ) {
2323  const IntrinsicType a1( set( A(i,k) ) );
2324  xmm1 = xmm1 + a1 * B.get(k,j );
2325  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2326  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2327  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2328  }
2329  store( &(~C)(i,j ), xmm1 * factor );
2330  store( &(~C)(i,j+IT::size ), xmm2 * factor );
2331  store( &(~C)(i,j+IT::size*2UL), xmm3 * factor );
2332  store( &(~C)(i,j+IT::size*3UL), xmm4 * factor );
2333  }
2334  }
2335  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2336  size_t i( 0UL );
2337  for( ; (i+2UL) <= M; i+=2UL ) {
2338  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2339  for( size_t k=0UL; k<K; ++k ) {
2340  const IntrinsicType a1( set( A(i ,k) ) );
2341  const IntrinsicType a2( set( A(i+1UL,k) ) );
2342  const IntrinsicType b1( B.get(k,j ) );
2343  const IntrinsicType b2( B.get(k,j+IT::size) );
2344  xmm1 = xmm1 + a1 * b1;
2345  xmm2 = xmm2 + a1 * b2;
2346  xmm3 = xmm3 + a2 * b1;
2347  xmm4 = xmm4 + a2 * b2;
2348  }
2349  store( &(~C)(i ,j ), xmm1 * factor );
2350  store( &(~C)(i ,j+IT::size), xmm2 * factor );
2351  store( &(~C)(i+1UL,j ), xmm3 * factor );
2352  store( &(~C)(i+1UL,j+IT::size), xmm4 * factor );
2353  }
2354  if( i < M ) {
2355  IntrinsicType xmm1, xmm2;
2356  for( size_t k=0UL; k<K; ++k ) {
2357  const IntrinsicType a1( set( A(i,k) ) );
2358  xmm1 = xmm1 + a1 * B.get(k,j );
2359  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
2360  }
2361  store( &(~C)(i,j ), xmm1 * factor );
2362  store( &(~C)(i,j+IT::size), xmm2 * factor );
2363  }
2364  }
2365  if( j < N ) {
2366  size_t i( 0UL );
2367  for( ; (i+2UL) <= M; i+=2UL ) {
2368  IntrinsicType xmm1, xmm2;
2369  for( size_t k=0UL; k<K; ++k ) {
2370  const IntrinsicType b1( B.get(k,j) );
2371  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2372  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2373  }
2374  store( &(~C)(i ,j), xmm1 * factor );
2375  store( &(~C)(i+1UL,j), xmm2 * factor );
2376  }
2377  if( i < M ) {
2378  IntrinsicType xmm1;
2379  for( size_t k=0UL; k<K; ++k ) {
2380  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
2381  }
2382  store( &(~C)(i,j), xmm1 * factor );
2383  }
2384  }
2385  }
2386  //**********************************************************************************************
2387 
2388  //**Vectorized default assignment to column-major dense matrices********************************
2402  template< typename MT3 // Type of the left-hand side target matrix
2403  , typename MT4 // Type of the left-hand side matrix operand
2404  , typename MT5 // Type of the right-hand side matrix operand
2405  , typename ST2 > // Type of the scalar value
2406  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2407  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2408  {
2409  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2410  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2411 
2412  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2413  const typename MT4::OppositeType tmp( A );
2414  assign( ~C, tmp * B * scalar );
2415  }
2416  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2417  const typename MT5::OppositeType tmp( B );
2418  assign( ~C, A * tmp * scalar );
2419  }
2420  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2421  const typename MT4::OppositeType tmp( A );
2422  assign( ~C, tmp * B * scalar );
2423  }
2424  else {
2425  const typename MT5::OppositeType tmp( B );
2426  assign( ~C, A * tmp * scalar );
2427  }
2428  }
2429  //**********************************************************************************************
2430 
2431  //**BLAS-based assignment to dense matrices (default)*******************************************
2445  template< typename MT3 // Type of the left-hand side target matrix
2446  , typename MT4 // Type of the left-hand side matrix operand
2447  , typename MT5 // Type of the right-hand side matrix operand
2448  , typename ST2 > // Type of the scalar value
2449  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2450  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2451  {
2452  selectDefaultAssignKernel( C, A, B, scalar );
2453  }
2454  //**********************************************************************************************
2455 
2456  //**BLAS-based assignment to dense matrices (single precision)**********************************
2457 #if BLAZE_BLAS_MODE
2458 
2471  template< typename MT3 // Type of the left-hand side target matrix
2472  , typename MT4 // Type of the left-hand side matrix operand
2473  , typename MT5 // Type of the right-hand side matrix operand
2474  , typename ST2 > // Type of the scalar value
2475  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2476  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2477  {
2478  using boost::numeric_cast;
2479 
2480  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2481  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2482  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2483 
2484  const int M ( numeric_cast<int>( A.rows() ) );
2485  const int N ( numeric_cast<int>( B.columns() ) );
2486  const int K ( numeric_cast<int>( A.columns() ) );
2487  const int lda( numeric_cast<int>( A.spacing() ) );
2488  const int ldb( numeric_cast<int>( B.spacing() ) );
2489  const int ldc( numeric_cast<int>( C.spacing() ) );
2490 
2491  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2492  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2493  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2494  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2495  }
2496 #endif
2497  //**********************************************************************************************
2498 
2499  //**BLAS-based assignment to dense matrices (double precision)**********************************
2500 #if BLAZE_BLAS_MODE
2501 
2514  template< typename MT3 // Type of the left-hand side target matrix
2515  , typename MT4 // Type of the left-hand side matrix operand
2516  , typename MT5 // Type of the right-hand side matrix operand
2517  , typename ST2 > // Type of the scalar value
2518  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2519  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2520  {
2521  using boost::numeric_cast;
2522 
2523  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
2524  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
2525  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
2526 
2527  const int M ( numeric_cast<int>( A.rows() ) );
2528  const int N ( numeric_cast<int>( B.columns() ) );
2529  const int K ( numeric_cast<int>( A.columns() ) );
2530  const int lda( numeric_cast<int>( A.spacing() ) );
2531  const int ldb( numeric_cast<int>( B.spacing() ) );
2532  const int ldc( numeric_cast<int>( C.spacing() ) );
2533 
2534  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2535  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2536  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2537  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2538  }
2539 #endif
2540  //**********************************************************************************************
2541 
2542  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2543 #if BLAZE_BLAS_MODE
2544 
2557  template< typename MT3 // Type of the left-hand side target matrix
2558  , typename MT4 // Type of the left-hand side matrix operand
2559  , typename MT5 // Type of the right-hand side matrix operand
2560  , typename ST2 > // Type of the scalar value
2561  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2562  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2563  {
2564  using boost::numeric_cast;
2565 
2566  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2567  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2568  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2570  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2571  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2572  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2573 
2574  const int M ( numeric_cast<int>( A.rows() ) );
2575  const int N ( numeric_cast<int>( B.columns() ) );
2576  const int K ( numeric_cast<int>( A.columns() ) );
2577  const int lda( numeric_cast<int>( A.spacing() ) );
2578  const int ldb( numeric_cast<int>( B.spacing() ) );
2579  const int ldc( numeric_cast<int>( C.spacing() ) );
2580  const complex<float> alpha( scalar );
2581  const complex<float> beta ( 0.0F, 0.0F );
2582 
2583  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2584  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2585  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2586  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2587  }
2588 #endif
2589  //**********************************************************************************************
2590 
2591  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2592 #if BLAZE_BLAS_MODE
2593 
2606  template< typename MT3 // Type of the left-hand side target matrix
2607  , typename MT4 // Type of the left-hand side matrix operand
2608  , typename MT5 // Type of the right-hand side matrix operand
2609  , typename ST2 > // Type of the scalar
2610  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2611  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2612  {
2613  using boost::numeric_cast;
2614 
2615  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2616  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2617  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2619  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2620  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2621  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2622 
2623  const int M ( numeric_cast<int>( A.rows() ) );
2624  const int N ( numeric_cast<int>( B.columns() ) );
2625  const int K ( numeric_cast<int>( A.columns() ) );
2626  const int lda( numeric_cast<int>( A.spacing() ) );
2627  const int ldb( numeric_cast<int>( B.spacing() ) );
2628  const int ldc( numeric_cast<int>( C.spacing() ) );
2629  const complex<double> alpha( scalar );
2630  const complex<double> beta ( 0.0, 0.0 );
2631 
2632  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2633  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2634  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2635  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2636  }
2637 #endif
2638  //**********************************************************************************************
2639 
2640  //**Assignment to sparse matrices***************************************************************
2651  template< typename MT // Type of the target sparse matrix
2652  , bool SO > // Storage order of the target sparse matrix
2653  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2654  {
2656 
2657  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2658 
2664  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
2665 
2666  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2667  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2668 
2669  const TmpType tmp( rhs );
2670  assign( ~lhs, tmp );
2671  }
2672  //**********************************************************************************************
2673 
2674  //**Addition assignment to dense matrices*******************************************************
2686  template< typename MT3 // Type of the target dense matrix
2687  , bool SO > // Storage order of the target dense matrix
2688  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2689  {
2691 
2692  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2693  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2694 
2695  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2696  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2697 
2698  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2699  return;
2700  }
2701 
2702  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2703  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2704 
2705  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2706  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2707  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2708  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2709  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2710  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2711 
2712  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
2713  DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2714  else
2715  DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2716  }
2717  //**********************************************************************************************
2718 
2719  //**Default addition assignment to dense matrices***********************************************
2733  template< typename MT3 // Type of the left-hand side target matrix
2734  , typename MT4 // Type of the left-hand side matrix operand
2735  , typename MT5 // Type of the right-hand side matrix operand
2736  , typename ST2 > // Type of the scalar value
2737  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2738  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2739  {
2740  const ResultType tmp( A * B * scalar );
2741  addAssign( C, tmp );
2742  }
2743  //**********************************************************************************************
2744 
2745  //**Vectorized default addition assignment to row-major dense matrices**************************
2759  template< typename MT3 // Type of the left-hand side target matrix
2760  , typename MT4 // Type of the left-hand side matrix operand
2761  , typename MT5 // Type of the right-hand side matrix operand
2762  , typename ST2 > // Type of the scalar value
2763  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2764  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2765  {
2766  typedef IntrinsicTrait<ElementType> IT;
2767 
2768  const size_t M( A.rows() );
2769  const size_t N( B.spacing() );
2770  const size_t K( A.columns() );
2771 
2772  const IntrinsicType factor( set( scalar ) );
2773 
2774  size_t j( 0UL );
2775 
2776  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2777  for( size_t i=0UL; i<M; ++i ) {
2778  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2779  for( size_t k=0UL; k<K; ++k ) {
2780  const IntrinsicType a1( set( A(i,k) ) );
2781  xmm1 = xmm1 + a1 * B.get(k,j );
2782  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2783  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2784  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2785  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
2786  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
2787  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
2788  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
2789  }
2790  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2791  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) + xmm2 * factor );
2792  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) + xmm3 * factor );
2793  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) + xmm4 * factor );
2794  store( &(~C)(i,j+IT::size*4UL), load( &(~C)(i,j+IT::size*4UL) ) + xmm5 * factor );
2795  store( &(~C)(i,j+IT::size*5UL), load( &(~C)(i,j+IT::size*5UL) ) + xmm6 * factor );
2796  store( &(~C)(i,j+IT::size*6UL), load( &(~C)(i,j+IT::size*6UL) ) + xmm7 * factor );
2797  store( &(~C)(i,j+IT::size*7UL), load( &(~C)(i,j+IT::size*7UL) ) + xmm8 * factor );
2798  }
2799  }
2800  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2801  size_t i( 0UL );
2802  for( ; (i+2UL) <= M; i+=2UL ) {
2803  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2804  for( size_t k=0UL; k<K; ++k ) {
2805  const IntrinsicType a1( set( A(i ,k) ) );
2806  const IntrinsicType a2( set( A(i+1UL,k) ) );
2807  const IntrinsicType b1( B.get(k,j ) );
2808  const IntrinsicType b2( B.get(k,j+IT::size ) );
2809  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
2810  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
2811  xmm1 = xmm1 + a1 * b1;
2812  xmm2 = xmm2 + a1 * b2;
2813  xmm3 = xmm3 + a1 * b3;
2814  xmm4 = xmm4 + a1 * b4;
2815  xmm5 = xmm5 + a2 * b1;
2816  xmm6 = xmm6 + a2 * b2;
2817  xmm7 = xmm7 + a2 * b3;
2818  xmm8 = xmm8 + a2 * b4;
2819  }
2820  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2821  store( &(~C)(i ,j+IT::size ), load( &(~C)(i ,j+IT::size ) ) + xmm2 * factor );
2822  store( &(~C)(i ,j+IT::size*2UL), load( &(~C)(i ,j+IT::size*2UL) ) + xmm3 * factor );
2823  store( &(~C)(i ,j+IT::size*3UL), load( &(~C)(i ,j+IT::size*3UL) ) + xmm4 * factor );
2824  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) + xmm5 * factor );
2825  store( &(~C)(i+1UL,j+IT::size ), load( &(~C)(i+1UL,j+IT::size ) ) + xmm6 * factor );
2826  store( &(~C)(i+1UL,j+IT::size*2UL), load( &(~C)(i+1UL,j+IT::size*2UL) ) + xmm7 * factor );
2827  store( &(~C)(i+1UL,j+IT::size*3UL), load( &(~C)(i+1UL,j+IT::size*3UL) ) + xmm8 * factor );
2828  }
2829  if( i < M ) {
2830  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2831  for( size_t k=0UL; k<K; ++k ) {
2832  const IntrinsicType a1( set( A(i,k) ) );
2833  xmm1 = xmm1 + a1 * B.get(k,j );
2834  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2835  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2836  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2837  }
2838  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2839  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) + xmm2 * factor );
2840  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) + xmm3 * factor );
2841  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) + xmm4 * factor );
2842  }
2843  }
2844  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2845  size_t i( 0UL );
2846  for( ; (i+2UL) <= M; i+=2UL ) {
2847  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2848  for( size_t k=0UL; k<K; ++k ) {
2849  const IntrinsicType a1( set( A(i ,k) ) );
2850  const IntrinsicType a2( set( A(i+1UL,k) ) );
2851  const IntrinsicType b1( B.get(k,j ) );
2852  const IntrinsicType b2( B.get(k,j+IT::size) );
2853  xmm1 = xmm1 + a1 * b1;
2854  xmm2 = xmm2 + a1 * b2;
2855  xmm3 = xmm3 + a2 * b1;
2856  xmm4 = xmm4 + a2 * b2;
2857  }
2858  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2859  store( &(~C)(i ,j+IT::size), load( &(~C)(i ,j+IT::size) ) + xmm2 * factor );
2860  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) + xmm3 * factor );
2861  store( &(~C)(i+1UL,j+IT::size), load( &(~C)(i+1UL,j+IT::size) ) + xmm4 * factor );
2862  }
2863  if( i < M ) {
2864  IntrinsicType xmm1, xmm2;
2865  for( size_t k=0UL; k<K; ++k ) {
2866  const IntrinsicType a1( set( A(i,k) ) );
2867  xmm1 = xmm1 + a1 * B.get(k,j );
2868  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
2869  }
2870  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2871  store( &(~C)(i,j+IT::size), load( &(~C)(i,j+IT::size) ) + xmm2 * factor );
2872  }
2873  }
2874  if( j < N ) {
2875  size_t i( 0UL );
2876  for( ; (i+2UL) <= M; i+=2UL ) {
2877  IntrinsicType xmm1, xmm2;
2878  for( size_t k=0UL; k<K; ++k ) {
2879  const IntrinsicType b1( B.get(k,j) );
2880  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2881  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2882  }
2883  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2884  store( &(~C)(i+1UL,j), load( &(~C)(i+1UL,j) ) + xmm2 * factor );
2885  }
2886  if( i < M ) {
2887  IntrinsicType xmm1;
2888  for( size_t k=0UL; k<K; ++k ) {
2889  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
2890  }
2891  store( &(~C)(i,j), load( &(~C)(i,j) ) + xmm1 * factor );
2892  }
2893  }
2894  }
2895  //**********************************************************************************************
2896 
2897  //**Vectorized default addition assignment to column-major dense matrices***********************
2911  template< typename MT3 // Type of the left-hand side target matrix
2912  , typename MT4 // Type of the left-hand side matrix operand
2913  , typename MT5 // Type of the right-hand side matrix operand
2914  , typename ST2 > // Type of the scalar value
2915  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2916  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2917  {
2918  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2919  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2920 
2921  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2922  const typename MT4::OppositeType tmp( A );
2923  addAssign( ~C, tmp * B * scalar );
2924  }
2925  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2926  const typename MT5::OppositeType tmp( B );
2927  addAssign( ~C, A * tmp * scalar );
2928  }
2929  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2930  const typename MT4::OppositeType tmp( A );
2931  addAssign( ~C, tmp * B * scalar );
2932  }
2933  else {
2934  const typename MT5::OppositeType tmp( B );
2935  addAssign( ~C, A * tmp * scalar );
2936  }
2937  }
2938  //**********************************************************************************************
2939 
2940  //**BLAS-based addition assignment to dense matrices (default)**********************************
2954  template< typename MT3 // Type of the left-hand side target matrix
2955  , typename MT4 // Type of the left-hand side matrix operand
2956  , typename MT5 // Type of the right-hand side matrix operand
2957  , typename ST2 > // Type of the scalar value
2958  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2959  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2960  {
2961  selectDefaultAddAssignKernel( C, A, B, scalar );
2962  }
2963  //**********************************************************************************************
2964 
2965  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2966 #if BLAZE_BLAS_MODE
2967 
2980  template< typename MT3 // Type of the left-hand side target matrix
2981  , typename MT4 // Type of the left-hand side matrix operand
2982  , typename MT5 // Type of the right-hand side matrix operand
2983  , typename ST2 > // Type of the scalar value
2984  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2985  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2986  {
2987  using boost::numeric_cast;
2988 
2989  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2990  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2991  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2992 
2993  const int M ( numeric_cast<int>( A.rows() ) );
2994  const int N ( numeric_cast<int>( B.columns() ) );
2995  const int K ( numeric_cast<int>( A.columns() ) );
2996  const int lda( numeric_cast<int>( A.spacing() ) );
2997  const int ldb( numeric_cast<int>( B.spacing() ) );
2998  const int ldc( numeric_cast<int>( C.spacing() ) );
2999 
3000  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3001  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3002  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3003  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3004  }
3005 #endif
3006  //**********************************************************************************************
3007 
3008  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3009 #if BLAZE_BLAS_MODE
3010 
3023  template< typename MT3 // Type of the left-hand side target matrix
3024  , typename MT4 // Type of the left-hand side matrix operand
3025  , typename MT5 // Type of the right-hand side matrix operand
3026  , typename ST2 > // Type of the scalar value
3027  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3028  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3029  {
3030  using boost::numeric_cast;
3031 
3032  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3033  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3034  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3035 
3036  const int M ( numeric_cast<int>( A.rows() ) );
3037  const int N ( numeric_cast<int>( B.columns() ) );
3038  const int K ( numeric_cast<int>( A.columns() ) );
3039  const int lda( numeric_cast<int>( A.spacing() ) );
3040  const int ldb( numeric_cast<int>( B.spacing() ) );
3041  const int ldc( numeric_cast<int>( C.spacing() ) );
3042 
3043  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3044  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3045  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3046  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3047  }
3048 #endif
3049  //**********************************************************************************************
3050 
3051  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3052 #if BLAZE_BLAS_MODE
3053 
3066  template< typename MT3 // Type of the left-hand side target matrix
3067  , typename MT4 // Type of the left-hand side matrix operand
3068  , typename MT5 // Type of the right-hand side matrix operand
3069  , typename ST2 > // Type of the scalar value
3070  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3071  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3072  {
3073  using boost::numeric_cast;
3074 
3075  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3076  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3077  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3079  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3080  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3081  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3082 
3083  const int M ( numeric_cast<int>( A.rows() ) );
3084  const int N ( numeric_cast<int>( B.columns() ) );
3085  const int K ( numeric_cast<int>( A.columns() ) );
3086  const int lda( numeric_cast<int>( A.spacing() ) );
3087  const int ldb( numeric_cast<int>( B.spacing() ) );
3088  const int ldc( numeric_cast<int>( C.spacing() ) );
3089  const complex<float> alpha( scalar );
3090  const complex<float> beta ( 1.0F, 0.0F );
3091 
3092  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3093  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3094  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3095  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3096  }
3097 #endif
3098  //**********************************************************************************************
3099 
3100  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3101 #if BLAZE_BLAS_MODE
3102 
3115  template< typename MT3 // Type of the left-hand side target matrix
3116  , typename MT4 // Type of the left-hand side matrix operand
3117  , typename MT5 // Type of the right-hand side matrix operand
3118  , typename ST2 > // Type of the scalar value
3119  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3120  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3121  {
3122  using boost::numeric_cast;
3123 
3124  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3125  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3126  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3128  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3129  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3130  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3131 
3132  const int M ( numeric_cast<int>( A.rows() ) );
3133  const int N ( numeric_cast<int>( B.columns() ) );
3134  const int K ( numeric_cast<int>( A.columns() ) );
3135  const int lda( numeric_cast<int>( A.spacing() ) );
3136  const int ldb( numeric_cast<int>( B.spacing() ) );
3137  const int ldc( numeric_cast<int>( C.spacing() ) );
3138  const complex<double> alpha( scalar );
3139  const complex<double> beta ( 1.0, 0.0 );
3140 
3141  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3142  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3143  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3144  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3145  }
3146 #endif
3147  //**********************************************************************************************
3148 
3149  //**Addition assignment to sparse matrices******************************************************
3150  // No special implementation for the addition assignment to sparse matrices.
3151  //**********************************************************************************************
3152 
3153  //**Subtraction assignment to dense matrices****************************************************
3165  template< typename MT3 // Type of the target dense matrix
3166  , bool SO > // Storage order of the target dense matrix
3167  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3168  {
3170 
3171  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3172  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3173 
3174  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3175  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3176 
3177  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3178  return;
3179  }
3180 
3181  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3182  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3183 
3184  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3185  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3186  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3187  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3188  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3189  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3190 
3191  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
3192  DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3193  else
3194  DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3195  }
3196  //**********************************************************************************************
3197 
3198  //**Default subtraction assignment to dense matrices********************************************
3212  template< typename MT3 // Type of the left-hand side target matrix
3213  , typename MT4 // Type of the left-hand side matrix operand
3214  , typename MT5 // Type of the right-hand side matrix operand
3215  , typename ST2 > // Type of the scalar value
3216  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3217  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3218  {
3219  const ResultType tmp( A * B * scalar );
3220  subAssign( C, tmp );
3221  }
3222  //**********************************************************************************************
3223 
3224  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3238  template< typename MT3 // Type of the left-hand side target matrix
3239  , typename MT4 // Type of the left-hand side matrix operand
3240  , typename MT5 // Type of the right-hand side matrix operand
3241  , typename ST2 > // Type of the scalar value
3242  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3243  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3244  {
3245  typedef IntrinsicTrait<ElementType> IT;
3246 
3247  const size_t M( A.rows() );
3248  const size_t N( B.spacing() );
3249  const size_t K( A.columns() );
3250 
3251  const IntrinsicType factor( set( scalar ) );
3252 
3253  size_t j( 0UL );
3254 
3255  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
3256  for( size_t i=0UL; i<M; ++i ) {
3257  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3258  for( size_t k=0UL; k<K; ++k ) {
3259  const IntrinsicType a1( set( A(i,k) ) );
3260  xmm1 = xmm1 + a1 * B.get(k,j );
3261  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
3262  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
3263  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
3264  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
3265  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
3266  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
3267  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
3268  }
3269  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3270  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) - xmm2 * factor );
3271  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) - xmm3 * factor );
3272  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) - xmm4 * factor );
3273  store( &(~C)(i,j+IT::size*4UL), load( &(~C)(i,j+IT::size*4UL) ) - xmm5 * factor );
3274  store( &(~C)(i,j+IT::size*5UL), load( &(~C)(i,j+IT::size*5UL) ) - xmm6 * factor );
3275  store( &(~C)(i,j+IT::size*6UL), load( &(~C)(i,j+IT::size*6UL) ) - xmm7 * factor );
3276  store( &(~C)(i,j+IT::size*7UL), load( &(~C)(i,j+IT::size*7UL) ) - xmm8 * factor );
3277  }
3278  }
3279  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
3280  size_t i( 0UL );
3281  for( ; (i+2UL) <= M; i+=2UL ) {
3282  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3283  for( size_t k=0UL; k<K; ++k ) {
3284  const IntrinsicType a1( set( A(i ,k) ) );
3285  const IntrinsicType a2( set( A(i+1UL,k) ) );
3286  const IntrinsicType b1( B.get(k,j ) );
3287  const IntrinsicType b2( B.get(k,j+IT::size ) );
3288  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
3289  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
3290  xmm1 = xmm1 + a1 * b1;
3291  xmm2 = xmm2 + a1 * b2;
3292  xmm3 = xmm3 + a1 * b3;
3293  xmm4 = xmm4 + a1 * b4;
3294  xmm5 = xmm5 + a2 * b1;
3295  xmm6 = xmm6 + a2 * b2;
3296  xmm7 = xmm7 + a2 * b3;
3297  xmm8 = xmm8 + a2 * b4;
3298  }
3299  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3300  store( &(~C)(i ,j+IT::size ), load( &(~C)(i ,j+IT::size ) ) - xmm2 * factor );
3301  store( &(~C)(i ,j+IT::size*2UL), load( &(~C)(i ,j+IT::size*2UL) ) - xmm3 * factor );
3302  store( &(~C)(i ,j+IT::size*3UL), load( &(~C)(i ,j+IT::size*3UL) ) - xmm4 * factor );
3303  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) - xmm5 * factor );
3304  store( &(~C)(i+1UL,j+IT::size ), load( &(~C)(i+1UL,j+IT::size ) ) - xmm6 * factor );
3305  store( &(~C)(i+1UL,j+IT::size*2UL), load( &(~C)(i+1UL,j+IT::size*2UL) ) - xmm7 * factor );
3306  store( &(~C)(i+1UL,j+IT::size*3UL), load( &(~C)(i+1UL,j+IT::size*3UL) ) - xmm8 * factor );
3307  }
3308  if( i < M ) {
3309  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3310  for( size_t k=0UL; k<K; ++k ) {
3311  const IntrinsicType a1( set( A(i,k) ) );
3312  xmm1 = xmm1 + a1 * B.get(k,j );
3313  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
3314  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
3315  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
3316  }
3317  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3318  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) - xmm2 * factor );
3319  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) - xmm3 * factor );
3320  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) - xmm4 * factor );
3321  }
3322  }
3323  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
3324  size_t i( 0UL );
3325  for( ; (i+2UL) <= M; i+=2UL ) {
3326  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3327  for( size_t k=0UL; k<K; ++k ) {
3328  const IntrinsicType a1( set( A(i ,k) ) );
3329  const IntrinsicType a2( set( A(i+1UL,k) ) );
3330  const IntrinsicType b1( B.get(k,j ) );
3331  const IntrinsicType b2( B.get(k,j+IT::size) );
3332  xmm1 = xmm1 + a1 * b1;
3333  xmm2 = xmm2 + a1 * b2;
3334  xmm3 = xmm3 + a2 * b1;
3335  xmm4 = xmm4 + a2 * b2;
3336  }
3337  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3338  store( &(~C)(i ,j+IT::size), load( &(~C)(i ,j+IT::size) ) - xmm2 * factor );
3339  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) - xmm3 * factor );
3340  store( &(~C)(i+1UL,j+IT::size), load( &(~C)(i+1UL,j+IT::size) ) - xmm4 * factor );
3341  }
3342  if( i < M ) {
3343  IntrinsicType xmm1, xmm2;
3344  for( size_t k=0UL; k<K; ++k ) {
3345  const IntrinsicType a1( set( A(i,k) ) );
3346  xmm1 = xmm1 + a1 * B.get(k,j );
3347  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
3348  }
3349  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3350  store( &(~C)(i,j+IT::size), load( &(~C)(i,j+IT::size) ) - xmm2 * factor );
3351  }
3352  }
3353  if( j < N ) {
3354  size_t i( 0UL );
3355  for( ; (i+2UL) <= M; i+=2UL ) {
3356  IntrinsicType xmm1, xmm2;
3357  for( size_t k=0UL; k<K; ++k ) {
3358  const IntrinsicType b1( B.get(k,j) );
3359  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3360  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3361  }
3362  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3363  store( &(~C)(i+1UL,j), load( &(~C)(i+1UL,j) ) - xmm2 * factor );
3364  }
3365  if( i < M ) {
3366  IntrinsicType xmm1;
3367  for( size_t k=0UL; k<K; ++k ) {
3368  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
3369  }
3370  store( &(~C)(i,j), load( &(~C)(i,j) ) - xmm1 * factor );
3371  }
3372  }
3373  }
3374  //**********************************************************************************************
3375 
3376  //**Vectorized default subtraction assignment to column-major dense matrices********************
3390  template< typename MT3 // Type of the left-hand side target matrix
3391  , typename MT4 // Type of the left-hand side matrix operand
3392  , typename MT5 // Type of the right-hand side matrix operand
3393  , typename ST2 > // Type of the scalar value
3394  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3395  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3396  {
3397  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
3398  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
3399 
3400  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3401  const typename MT4::OppositeType tmp( A );
3402  subAssign( ~C, tmp * B * scalar );
3403  }
3404  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3405  const typename MT5::OppositeType tmp( B );
3406  subAssign( ~C, A * tmp * scalar );
3407  }
3408  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3409  const typename MT4::OppositeType tmp( A );
3410  subAssign( ~C, tmp * B * scalar );
3411  }
3412  else {
3413  const typename MT5::OppositeType tmp( B );
3414  subAssign( ~C, A * tmp * scalar );
3415  }
3416  }
3417  //**********************************************************************************************
3418 
3419  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3433  template< typename MT3 // Type of the left-hand side target matrix
3434  , typename MT4 // Type of the left-hand side matrix operand
3435  , typename MT5 // Type of the right-hand side matrix operand
3436  , typename ST2 > // Type of the scalar value
3437  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3438  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3439  {
3440  selectDefaultSubAssignKernel( C, A, B, scalar );
3441  }
3442  //**********************************************************************************************
3443 
3444  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3445 #if BLAZE_BLAS_MODE
3446 
3459  template< typename MT3 // Type of the left-hand side target matrix
3460  , typename MT4 // Type of the left-hand side matrix operand
3461  , typename MT5 // Type of the right-hand side matrix operand
3462  , typename ST2 > // Type of the scalar value
3463  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3464  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3465  {
3466  using boost::numeric_cast;
3467 
3468  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
3469  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
3470  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
3471 
3472  const int M ( numeric_cast<int>( A.rows() ) );
3473  const int N ( numeric_cast<int>( B.columns() ) );
3474  const int K ( numeric_cast<int>( A.columns() ) );
3475  const int lda( numeric_cast<int>( A.spacing() ) );
3476  const int ldb( numeric_cast<int>( B.spacing() ) );
3477  const int ldc( numeric_cast<int>( C.spacing() ) );
3478 
3479  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3480  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3481  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3482  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3483  }
3484 #endif
3485  //**********************************************************************************************
3486 
3487  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3488 #if BLAZE_BLAS_MODE
3489 
3502  template< typename MT3 // Type of the left-hand side target matrix
3503  , typename MT4 // Type of the left-hand side matrix operand
3504  , typename MT5 // Type of the right-hand side matrix operand
3505  , typename ST2 > // Type of the scalar value
3506  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3507  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3508  {
3509  using boost::numeric_cast;
3510 
3511  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3512  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3513  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3514 
3515  const int M ( numeric_cast<int>( A.rows() ) );
3516  const int N ( numeric_cast<int>( B.columns() ) );
3517  const int K ( numeric_cast<int>( A.columns() ) );
3518  const int lda( numeric_cast<int>( A.spacing() ) );
3519  const int ldb( numeric_cast<int>( B.spacing() ) );
3520  const int ldc( numeric_cast<int>( C.spacing() ) );
3521 
3522  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3523  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3524  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3525  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3526  }
3527 #endif
3528  //**********************************************************************************************
3529 
3530  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3531 #if BLAZE_BLAS_MODE
3532 
3545  template< typename MT3 // Type of the left-hand side target matrix
3546  , typename MT4 // Type of the left-hand side matrix operand
3547  , typename MT5 // Type of the right-hand side matrix operand
3548  , typename ST2 > // Type of the scalar value
3549  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3550  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3551  {
3552  using boost::numeric_cast;
3553 
3554  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3555  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3556  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3558  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3559  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3560  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3561 
3562  const int M ( numeric_cast<int>( A.rows() ) );
3563  const int N ( numeric_cast<int>( B.columns() ) );
3564  const int K ( numeric_cast<int>( A.columns() ) );
3565  const int lda( numeric_cast<int>( A.spacing() ) );
3566  const int ldb( numeric_cast<int>( B.spacing() ) );
3567  const int ldc( numeric_cast<int>( C.spacing() ) );
3568  const complex<float> alpha( -scalar );
3569  const complex<float> beta ( 1.0F, 0.0F );
3570 
3571  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3572  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3573  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3574  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3575  }
3576 #endif
3577  //**********************************************************************************************
3578 
3579  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3580 #if BLAZE_BLAS_MODE
3581 
3594  template< typename MT3 // Type of the left-hand side target matrix
3595  , typename MT4 // Type of the left-hand side matrix operand
3596  , typename MT5 // Type of the right-hand side matrix operand
3597  , typename ST2 > // Type of the scalar value
3598  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3599  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3600  {
3601  using boost::numeric_cast;
3602 
3603  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3604  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3605  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3607  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3608  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3609  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3610 
3611  const int M ( numeric_cast<int>( A.rows() ) );
3612  const int N ( numeric_cast<int>( B.columns() ) );
3613  const int K ( numeric_cast<int>( A.columns() ) );
3614  const int lda( numeric_cast<int>( A.spacing() ) );
3615  const int ldb( numeric_cast<int>( B.spacing() ) );
3616  const int ldc( numeric_cast<int>( C.spacing() ) );
3617  const complex<double> alpha( -scalar );
3618  const complex<double> beta ( 1.0, 0.0 );
3619 
3620  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3621  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3622  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3623  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3624  }
3625 #endif
3626  //**********************************************************************************************
3627 
3628  //**Subtraction assignment to sparse matrices***************************************************
3629  // No special implementation for the subtraction assignment to sparse matrices.
3630  //**********************************************************************************************
3631 
3632  //**Multiplication assignment to dense matrices*************************************************
3633  // No special implementation for the multiplication assignment to dense matrices.
3634  //**********************************************************************************************
3635 
3636  //**Multiplication assignment to sparse matrices************************************************
3637  // No special implementation for the multiplication assignment to sparse matrices.
3638  //**********************************************************************************************
3639 
3640  //**Compile time checks*************************************************************************
3648  //**********************************************************************************************
3649 };
3651 //*************************************************************************************************
3652 
3653 
3654 
3655 
3656 //=================================================================================================
3657 //
3658 // GLOBAL BINARY ARITHMETIC OPERATORS
3659 //
3660 //=================================================================================================
3661 
3662 //*************************************************************************************************
3688 template< typename T1 // Type of the left-hand side dense matrix
3689  , typename T2 > // Type of the right-hand side dense matrix
3690 inline const DMatDMatMultExpr<T1,T2>
3692 {
3694 
3695  if( (~lhs).columns() != (~rhs).rows() )
3696  throw std::invalid_argument( "Matrix sizes do not match" );
3697 
3698  return DMatDMatMultExpr<T1,T2>( ~lhs, ~rhs );
3699 }
3700 //*************************************************************************************************
3701 
3702 
3703 
3704 
3705 //=================================================================================================
3706 //
3707 // GLOBAL OPERATORS
3708 //
3709 //=================================================================================================
3710 
3711 //*************************************************************************************************
3723 template< typename MT1 // Type of the left-hand side dense matrix
3724  , typename MT2 > // Type of the right-hand side dense matrix
3725 inline typename RowExprTrait< DMatDMatMultExpr<MT1,MT2> >::Type
3726  row( const DMatDMatMultExpr<MT1,MT2>& dm, size_t index )
3727 {
3729 
3730  return row( dm.leftOperand(), index ) * dm.rightOperand();
3731 }
3733 //*************************************************************************************************
3734 
3735 
3736 //*************************************************************************************************
3748 template< typename MT1 // Type of the left-hand side dense matrix
3749  , typename MT2 > // Type of the right-hand side dense matrix
3750 inline typename ColumnExprTrait< DMatDMatMultExpr<MT1,MT2> >::Type
3751  column( const DMatDMatMultExpr<MT1,MT2>& dm, size_t index )
3752 {
3754 
3755  return dm.leftOperand() * column( dm.rightOperand(), index );
3756 }
3758 //*************************************************************************************************
3759 
3760 
3761 
3762 
3763 //=================================================================================================
3764 //
3765 // EXPRESSION TRAIT SPECIALIZATIONS
3766 //
3767 //=================================================================================================
3768 
3769 //*************************************************************************************************
3771 template< typename MT1, typename MT2, typename VT >
3772 struct DMatDVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
3773 {
3774  public:
3775  //**********************************************************************************************
3776  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3777  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
3778  IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
3779  , typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
3780  , INVALID_TYPE >::Type Type;
3781  //**********************************************************************************************
3782 };
3784 //*************************************************************************************************
3785 
3786 
3787 //*************************************************************************************************
3789 template< typename MT1, typename MT2, typename VT >
3790 struct DMatSVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
3791 {
3792  public:
3793  //**********************************************************************************************
3794  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3795  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
3796  IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
3797  , typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
3798  , INVALID_TYPE >::Type Type;
3799  //**********************************************************************************************
3800 };
3802 //*************************************************************************************************
3803 
3804 
3805 //*************************************************************************************************
3807 template< typename VT, typename MT1, typename MT2 >
3808 struct TDVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
3809 {
3810  public:
3811  //**********************************************************************************************
3812  typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
3813  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3814  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
3815  , typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3816  , INVALID_TYPE >::Type Type;
3817  //**********************************************************************************************
3818 };
3820 //*************************************************************************************************
3821 
3822 
3823 //*************************************************************************************************
3825 template< typename VT, typename MT1, typename MT2 >
3826 struct TSVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
3827 {
3828  public:
3829  //**********************************************************************************************
3830  typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
3831  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3832  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
3833  , typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3834  , INVALID_TYPE >::Type Type;
3835  //**********************************************************************************************
3836 };
3838 //*************************************************************************************************
3839 
3840 
3841 //*************************************************************************************************
3843 template< typename MT1, typename MT2 >
3844 struct RowExprTrait< DMatDMatMultExpr<MT1,MT2> >
3845 {
3846  public:
3847  //**********************************************************************************************
3848  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
3849  //**********************************************************************************************
3850 };
3852 //*************************************************************************************************
3853 
3854 
3855 //*************************************************************************************************
3857 template< typename MT1, typename MT2 >
3858 struct ColumnExprTrait< DMatDMatMultExpr<MT1,MT2> >
3859 {
3860  public:
3861  //**********************************************************************************************
3862  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
3863  //**********************************************************************************************
3864 };
3866 //*************************************************************************************************
3867 
3868 } // namespace blaze
3869 
3870 #endif