All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
40 #include <blaze/math/Intrinsics.h>
41 #include <blaze/math/shims/Reset.h>
56 #include <blaze/system/BLAS.h>
58 #include <blaze/util/Assert.h>
59 #include <blaze/util/Complex.h>
65 #include <blaze/util/DisableIf.h>
66 #include <blaze/util/EnableIf.h>
67 #include <blaze/util/InvalidType.h>
68 #include <blaze/util/SelectType.h>
69 #include <blaze/util/Types.h>
75 
76 
77 namespace blaze {
78 
79 //=================================================================================================
80 //
81 // CLASS DMATDMATMULTEXPR
82 //
83 //=================================================================================================
84 
85 //*************************************************************************************************
92 template< typename MT1 // Type of the left-hand side dense matrix
93  , typename MT2 > // Type of the right-hand side dense matrix
94 class DMatDMatMultExpr : public DenseMatrix< DMatDMatMultExpr<MT1,MT2>, false >
95  , private Expression
96  , private Computation
97 {
98  private:
99  //**Type definitions****************************************************************************
100  typedef typename MT1::ResultType RT1;
101  typedef typename MT2::ResultType RT2;
102  typedef typename MT1::CompositeType CT1;
103  typedef typename MT2::CompositeType CT2;
104  //**********************************************************************************************
105 
106  //**********************************************************************************************
108 
109 
111  template< typename T1, typename T2, typename T3 >
112  struct UseSinglePrecisionKernel {
116  };
118  //**********************************************************************************************
119 
120  //**********************************************************************************************
122 
123 
125  template< typename T1, typename T2, typename T3 >
126  struct UseDoublePrecisionKernel {
130  };
132  //**********************************************************************************************
133 
134  //**********************************************************************************************
136 
137 
140  template< typename T1, typename T2, typename T3 >
141  struct UseSinglePrecisionComplexKernel {
142  typedef complex<float> Type;
143  enum { value = IsSame<typename T1::ElementType,Type>::value &&
144  IsSame<typename T2::ElementType,Type>::value &&
145  IsSame<typename T3::ElementType,Type>::value };
146  };
148  //**********************************************************************************************
149 
150  //**********************************************************************************************
152 
153 
156  template< typename T1, typename T2, typename T3 >
157  struct UseDoublePrecisionComplexKernel {
158  typedef complex<double> Type;
159  enum { value = IsSame<typename T1::ElementType,Type>::value &&
160  IsSame<typename T2::ElementType,Type>::value &&
161  IsSame<typename T3::ElementType,Type>::value };
162  };
164  //**********************************************************************************************
165 
166  //**********************************************************************************************
168 
169 
171  template< typename T1, typename T2, typename T3 >
172  struct UseDefaultKernel {
173  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
174  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
175  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
176  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
177  };
179  //**********************************************************************************************
180 
181  //**********************************************************************************************
183 
184 
186  template< typename T1, typename T2, typename T3 >
187  struct UseVectorizedDefaultKernel {
188  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
189  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
190  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
191  IntrinsicTrait<typename T1::ElementType>::addition &&
192  IntrinsicTrait<typename T1::ElementType>::multiplication };
193  };
195  //**********************************************************************************************
196 
197  public:
198  //**Type definitions****************************************************************************
201  typedef typename ResultType::OppositeType OppositeType;
202  typedef typename ResultType::TransposeType TransposeType;
203  typedef typename ResultType::ElementType ElementType;
205  typedef const ElementType ReturnType;
206  typedef const ResultType CompositeType;
207 
209  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
210 
212  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
213 
215  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
216 
218  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
219  //**********************************************************************************************
220 
221  //**Compilation flags***************************************************************************
223  enum { vectorizable = 0 };
224 
226  enum { canAlias = !IsComputation<MT1>::value || !IsComputation<MT2>::value };
227  //**********************************************************************************************
228 
229  //**Constructor*********************************************************************************
235  explicit inline DMatDMatMultExpr( const MT1& lhs, const MT2& rhs )
236  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
237  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
238  {
239  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
240  }
241  //**********************************************************************************************
242 
243  //**Access operator*****************************************************************************
250  inline ReturnType operator()( size_t i, size_t j ) const {
251  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
252  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
253 
254  ElementType tmp;
255 
256  if( lhs_.columns() != 0UL ) {
257  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
258  tmp = lhs_(i,0UL) * rhs_(0UL,j);
259  for( size_t k=1UL; k<end; k+=2UL ) {
260  tmp += lhs_(i,k ) * rhs_(k ,j);
261  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
262  }
263  if( end < lhs_.columns() ) {
264  tmp += lhs_(i,end) * rhs_(end,j);
265  }
266  }
267  else {
268  reset( tmp );
269  }
270 
271  return tmp;
272  }
273  //**********************************************************************************************
274 
275  //**Rows function*******************************************************************************
280  inline size_t rows() const {
281  return lhs_.rows();
282  }
283  //**********************************************************************************************
284 
285  //**Columns function****************************************************************************
290  inline size_t columns() const {
291  return rhs_.columns();
292  }
293  //**********************************************************************************************
294 
295  //**Left operand access*************************************************************************
300  inline LeftOperand leftOperand() const {
301  return lhs_;
302  }
303  //**********************************************************************************************
304 
305  //**Right operand access************************************************************************
310  inline RightOperand rightOperand() const {
311  return rhs_;
312  }
313  //**********************************************************************************************
314 
315  //**********************************************************************************************
321  template< typename T >
322  inline bool isAliased( const T* alias ) const {
323  return ( !IsComputation<MT1>::value && lhs_.isAliased( alias ) ) ||
324  ( !IsComputation<MT2>::value && rhs_.isAliased( alias ) );
325  }
326  //**********************************************************************************************
327 
328  private:
329  //**Member variables****************************************************************************
332  //**********************************************************************************************
333 
334  //**Assignment to dense matrices****************************************************************
344  template< typename MT3 // Type of the target dense matrix
345  , bool SO > // Storage order of the target dense matrix
346  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
347  {
348  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
349  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
350 
351  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
352  return;
353  }
354  else if( rhs.lhs_.columns() == 0UL ) {
355  reset( ~lhs );
356  return;
357  }
358 
359  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
360  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
361 
362  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
363  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
364  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
365  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
366  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
367  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
368 
369  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
370  DMatDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
371  else
372  DMatDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
373  }
375  //**********************************************************************************************
376 
377  //**Default assignment to dense matrices********************************************************
390  template< typename MT3 // Type of the left-hand side target matrix
391  , typename MT4 // Type of the left-hand side matrix operand
392  , typename MT5 > // Type of the right-hand side matrix operand
393  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
394  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
395  {
396  const size_t M( A.rows() );
397  const size_t N( B.columns() );
398  const size_t K( A.columns() );
399 
400  for( size_t i=0UL; i<M; ++i ) {
401  for( size_t j=0UL; j<N; ++j ) {
402  C(i,j) = A(i,0UL) * B(0UL,j);
403  }
404  for( size_t k=1UL; k<K; ++k ) {
405  for( size_t j=0UL; j<N; ++j ) {
406  C(i,j) += A(i,k) * B(k,j);
407  }
408  }
409  }
410  }
412  //**********************************************************************************************
413 
414  //**Vectorized default assignment to row-major dense matrices***********************************
428  template< typename MT3 // Type of the left-hand side target matrix
429  , typename MT4 // Type of the left-hand side matrix operand
430  , typename MT5 > // Type of the right-hand side matrix operand
431  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
432  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
433  {
434  typedef IntrinsicTrait<ElementType> IT;
435 
436  const size_t M( A.rows() );
437  const size_t N( B.spacing() );
438  const size_t K( A.columns() );
439 
440  size_t j( 0UL );
441 
442  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
443  for( size_t i=0UL; i<M; ++i ) {
444  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
445  for( size_t k=0UL; k<K; ++k ) {
446  const IntrinsicType a1( set( A(i,k) ) );
447  xmm1 = xmm1 + a1 * B.get(k,j );
448  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
449  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
450  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
451  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
452  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
453  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
454  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
455  }
456  store( &(~C)(i,j ), xmm1 );
457  store( &(~C)(i,j+IT::size ), xmm2 );
458  store( &(~C)(i,j+IT::size*2UL), xmm3 );
459  store( &(~C)(i,j+IT::size*3UL), xmm4 );
460  store( &(~C)(i,j+IT::size*4UL), xmm5 );
461  store( &(~C)(i,j+IT::size*5UL), xmm6 );
462  store( &(~C)(i,j+IT::size*6UL), xmm7 );
463  store( &(~C)(i,j+IT::size*7UL), xmm8 );
464  }
465  }
466  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
467  size_t i( 0UL );
468  for( ; (i+2UL) <= M; i+=2UL ) {
469  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
470  for( size_t k=0UL; k<K; ++k ) {
471  const IntrinsicType a1( set( A(i ,k) ) );
472  const IntrinsicType a2( set( A(i+1UL,k) ) );
473  const IntrinsicType b1( B.get(k,j ) );
474  const IntrinsicType b2( B.get(k,j+IT::size ) );
475  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
476  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
477  xmm1 = xmm1 + a1 * b1;
478  xmm2 = xmm2 + a1 * b2;
479  xmm3 = xmm3 + a1 * b3;
480  xmm4 = xmm4 + a1 * b4;
481  xmm5 = xmm5 + a2 * b1;
482  xmm6 = xmm6 + a2 * b2;
483  xmm7 = xmm7 + a2 * b3;
484  xmm8 = xmm8 + a2 * b4;
485  }
486  store( &(~C)(i ,j ), xmm1 );
487  store( &(~C)(i ,j+IT::size ), xmm2 );
488  store( &(~C)(i ,j+IT::size*2UL), xmm3 );
489  store( &(~C)(i ,j+IT::size*3UL), xmm4 );
490  store( &(~C)(i+1UL,j ), xmm5 );
491  store( &(~C)(i+1UL,j+IT::size ), xmm6 );
492  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 );
493  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 );
494  }
495  if( i < M ) {
496  IntrinsicType xmm1, xmm2, xmm3, xmm4;
497  for( size_t k=0UL; k<K; ++k ) {
498  const IntrinsicType a1( set( A(i,k) ) );
499  xmm1 = xmm1 + a1 * B.get(k,j );
500  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
501  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
502  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
503  }
504  store( &(~C)(i,j ), xmm1 );
505  store( &(~C)(i,j+IT::size ), xmm2 );
506  store( &(~C)(i,j+IT::size*2UL), xmm3 );
507  store( &(~C)(i,j+IT::size*3UL), xmm4 );
508  }
509  }
510  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
511  size_t i( 0UL );
512  for( ; (i+2UL) <= M; i+=2UL ) {
513  IntrinsicType xmm1, xmm2, xmm3, xmm4;
514  for( size_t k=0UL; k<K; ++k ) {
515  const IntrinsicType a1( set( A(i ,k) ) );
516  const IntrinsicType a2( set( A(i+1UL,k) ) );
517  const IntrinsicType b1( B.get(k,j ) );
518  const IntrinsicType b2( B.get(k,j+IT::size) );
519  xmm1 = xmm1 + a1 * b1;
520  xmm2 = xmm2 + a1 * b2;
521  xmm3 = xmm3 + a2 * b1;
522  xmm4 = xmm4 + a2 * b2;
523  }
524  store( &(~C)(i ,j ), xmm1 );
525  store( &(~C)(i ,j+IT::size), xmm2 );
526  store( &(~C)(i+1UL,j ), xmm3 );
527  store( &(~C)(i+1UL,j+IT::size), xmm4 );
528  }
529  if( i < M ) {
530  IntrinsicType xmm1, xmm2;
531  for( size_t k=0UL; k<K; ++k ) {
532  const IntrinsicType a1( set( A(i,k) ) );
533  xmm1 = xmm1 + a1 * B.get(k,j );
534  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
535  }
536  store( &(~C)(i,j ), xmm1 );
537  store( &(~C)(i,j+IT::size), xmm2 );
538  }
539  }
540  if( j < N ) {
541  size_t i( 0UL );
542  for( ; (i+2UL) <= M; i+=2UL ) {
543  IntrinsicType xmm1, xmm2;
544  for( size_t k=0UL; k<K; ++k ) {
545  const IntrinsicType b1( B.get(k,j) );
546  xmm1 = xmm1 + set( A(i ,k) ) * b1;
547  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
548  }
549  store( &(~C)(i ,j), xmm1 );
550  store( &(~C)(i+1UL,j), xmm2 );
551  }
552  if( i < M ) {
553  IntrinsicType xmm1;
554  for( size_t k=0UL; k<K; ++k ) {
555  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
556  }
557  store( &(~C)(i,j), xmm1 );
558  }
559  }
560  }
562  //**********************************************************************************************
563 
564  //**Vectorized default assignment to column-major dense matrices********************************
578  template< typename MT3 // Type of the left-hand side target matrix
579  , typename MT4 // Type of the left-hand side matrix operand
580  , typename MT5 > // Type of the right-hand side matrix operand
581  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
582  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
583  {
584  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
585  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
586 
587  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
588  const typename MT4::OppositeType tmp( A );
589  assign( ~C, tmp * B );
590  }
591  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
592  const typename MT5::OppositeType tmp( B );
593  assign( ~C, A * tmp );
594  }
595  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
596  const typename MT4::OppositeType tmp( A );
597  assign( ~C, tmp * B );
598  }
599  else {
600  const typename MT5::OppositeType tmp( B );
601  assign( ~C, A * tmp );
602  }
603  }
605  //**********************************************************************************************
606 
607  //**BLAS-based assignment to dense matrices (default)*******************************************
620  template< typename MT3 // Type of the left-hand side target matrix
621  , typename MT4 // Type of the left-hand side matrix operand
622  , typename MT5 > // Type of the right-hand side matrix operand
623  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
624  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
625  {
626  selectDefaultAssignKernel( C, A, B );
627  }
629  //**********************************************************************************************
630 
631  //**BLAS-based assignment to dense matrices (single precision)**********************************
632 #if BLAZE_BLAS_MODE
633 
646  template< typename MT3 // Type of the left-hand side target matrix
647  , typename MT4 // Type of the left-hand side matrix operand
648  , typename MT5 > // Type of the right-hand side matrix operand
649  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
650  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
651  {
652  using boost::numeric_cast;
653 
654  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
655  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
656  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
657 
658  const int M ( numeric_cast<int>( A.rows() ) );
659  const int N ( numeric_cast<int>( B.columns() ) );
660  const int K ( numeric_cast<int>( A.columns() ) );
661  const int lda( numeric_cast<int>( A.spacing() ) );
662  const int ldb( numeric_cast<int>( B.spacing() ) );
663  const int ldc( numeric_cast<int>( C.spacing() ) );
664 
665  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
666  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
667  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
668  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
669  }
671 #endif
672  //**********************************************************************************************
673 
674  //**BLAS-based assignment to dense matrices (double precision)**********************************
675 #if BLAZE_BLAS_MODE
676 
689  template< typename MT3 // Type of the left-hand side target matrix
690  , typename MT4 // Type of the left-hand side matrix operand
691  , typename MT5 > // Type of the right-hand side matrix operand
692  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
693  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
694  {
695  using boost::numeric_cast;
696 
697  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
698  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
699  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
700 
701  const int M ( numeric_cast<int>( A.rows() ) );
702  const int N ( numeric_cast<int>( B.columns() ) );
703  const int K ( numeric_cast<int>( A.columns() ) );
704  const int lda( numeric_cast<int>( A.spacing() ) );
705  const int ldb( numeric_cast<int>( B.spacing() ) );
706  const int ldc( numeric_cast<int>( C.spacing() ) );
707 
708  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
709  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
710  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
711  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
712  }
714 #endif
715  //**********************************************************************************************
716 
717  //**BLAS-based assignment to dense matrices (single precision complex)**************************
718 #if BLAZE_BLAS_MODE
719 
732  template< typename MT3 // Type of the left-hand side target matrix
733  , typename MT4 // Type of the left-hand side matrix operand
734  , typename MT5 > // Type of the right-hand side matrix operand
735  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
736  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
737  {
738  using boost::numeric_cast;
739 
740  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
741  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
742  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
743  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
744  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
745  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
746 
747  const int M ( numeric_cast<int>( A.rows() ) );
748  const int N ( numeric_cast<int>( B.columns() ) );
749  const int K ( numeric_cast<int>( A.columns() ) );
750  const int lda( numeric_cast<int>( A.spacing() ) );
751  const int ldb( numeric_cast<int>( B.spacing() ) );
752  const int ldc( numeric_cast<int>( C.spacing() ) );
753  const complex<float> alpha( 1.0F, 0.0F );
754  const complex<float> beta ( 0.0F, 0.0F );
755 
756  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
757  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
758  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
759  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
760  }
762 #endif
763  //**********************************************************************************************
764 
765  //**BLAS-based assignment to dense matrices (double precision complex)**************************
766 #if BLAZE_BLAS_MODE
767 
780  template< typename MT3 // Type of the left-hand side target matrix
781  , typename MT4 // Type of the left-hand side matrix operand
782  , typename MT5 > // Type of the right-hand side matrix operand
783  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
784  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
785  {
786  using boost::numeric_cast;
787 
788  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
789  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
790  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
791  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
792  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
793  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
794 
795  const int M ( numeric_cast<int>( A.rows() ) );
796  const int N ( numeric_cast<int>( B.columns() ) );
797  const int K ( numeric_cast<int>( A.columns() ) );
798  const int lda( numeric_cast<int>( A.spacing() ) );
799  const int ldb( numeric_cast<int>( B.spacing() ) );
800  const int ldc( numeric_cast<int>( C.spacing() ) );
801  const complex<double> alpha( 1.0, 0.0 );
802  const complex<double> beta ( 0.0, 0.0 );
803 
804  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
805  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
806  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
807  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
808  }
810 #endif
811  //**********************************************************************************************
812 
813  //**Assignment to sparse matrices***************************************************************
825  template< typename MT // Type of the target sparse matrix
826  , bool SO > // Storage order of the target sparse matrix
827  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
828  {
829  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
830 
836  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
837 
838  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
839  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
840 
841  const TmpType tmp( rhs );
842  assign( ~lhs, tmp );
843  }
845  //**********************************************************************************************
846 
847  //**Addition assignment to dense matrices*******************************************************
860  template< typename MT3 // Type of the target dense matrix
861  , bool SO > // Storage order of the target dense matrix
862  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
863  {
864  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
865  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
866 
867  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
868  return;
869  }
870 
871  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
872  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
873 
874  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
875  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
876  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
877  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
878  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
879  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
880 
881  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
882  DMatDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
883  else
884  DMatDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
885  }
887  //**********************************************************************************************
888 
889  //**Default addition assignment to dense matrices***********************************************
903  template< typename MT3 // Type of the left-hand side target matrix
904  , typename MT4 // Type of the left-hand side matrix operand
905  , typename MT5 > // Type of the right-hand side matrix operand
906  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
907  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
908  {
909  const size_t M( A.rows() );
910  const size_t N( B.columns() );
911  const size_t K( A.columns() );
912 
913  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
914  const size_t end( N & size_t(-2) );
915 
916  for( size_t i=0UL; i<M; ++i ) {
917  for( size_t k=0UL; k<K; ++k ) {
918  for( size_t j=0UL; j<end; j+=2UL ) {
919  C(i,j ) += A(i,k) * B(k,j );
920  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
921  }
922  if( end < N ) {
923  C(i,end) += A(i,k) * B(k,end);
924  }
925  }
926  }
927  }
929  //**********************************************************************************************
930 
931  //**Vectorized default addition assignment to row-major dense matrices**************************
945  template< typename MT3 // Type of the left-hand side target matrix
946  , typename MT4 // Type of the left-hand side matrix operand
947  , typename MT5 > // Type of the right-hand side matrix operand
948  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
949  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
950  {
951  typedef IntrinsicTrait<ElementType> IT;
952 
953  const size_t M( A.rows() );
954  const size_t N( B.spacing() );
955  const size_t K( A.columns() );
956 
957  size_t j( 0UL );
958 
959  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
960  for( size_t i=0UL; i<M; ++i ) {
961  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
962  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
963  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
964  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
965  IntrinsicType xmm5( load( &(~C)(i,j+IT::size*4UL) ) );
966  IntrinsicType xmm6( load( &(~C)(i,j+IT::size*5UL) ) );
967  IntrinsicType xmm7( load( &(~C)(i,j+IT::size*6UL) ) );
968  IntrinsicType xmm8( load( &(~C)(i,j+IT::size*7UL) ) );
969  for( size_t k=0UL; k<K; ++k ) {
970  const IntrinsicType a1( set( A(i,k) ) );
971  xmm1 = xmm1 + a1 * B.get(k,j );
972  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
973  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
974  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
975  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
976  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
977  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
978  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
979  }
980  store( &(~C)(i,j ), xmm1 );
981  store( &(~C)(i,j+IT::size ), xmm2 );
982  store( &(~C)(i,j+IT::size*2UL), xmm3 );
983  store( &(~C)(i,j+IT::size*3UL), xmm4 );
984  store( &(~C)(i,j+IT::size*4UL), xmm5 );
985  store( &(~C)(i,j+IT::size*5UL), xmm6 );
986  store( &(~C)(i,j+IT::size*6UL), xmm7 );
987  store( &(~C)(i,j+IT::size*7UL), xmm8 );
988  }
989  }
990  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
991  size_t i( 0UL );
992  for( ; (i+2UL) <= M; i+=2UL ) {
993  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
994  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size ) ) );
995  IntrinsicType xmm3( load( &(~C)(i ,j+IT::size*2UL) ) );
996  IntrinsicType xmm4( load( &(~C)(i ,j+IT::size*3UL) ) );
997  IntrinsicType xmm5( load( &(~C)(i+1UL,j ) ) );
998  IntrinsicType xmm6( load( &(~C)(i+1UL,j+IT::size ) ) );
999  IntrinsicType xmm7( load( &(~C)(i+1UL,j+IT::size*2UL) ) );
1000  IntrinsicType xmm8( load( &(~C)(i+1UL,j+IT::size*3UL) ) );
1001  for( size_t k=0UL; k<K; ++k ) {
1002  const IntrinsicType a1( set( A(i ,k) ) );
1003  const IntrinsicType a2( set( A(i+1UL,k) ) );
1004  const IntrinsicType b1( B.get(k,j ) );
1005  const IntrinsicType b2( B.get(k,j+IT::size ) );
1006  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
1007  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
1008  xmm1 = xmm1 + a1 * b1;
1009  xmm2 = xmm2 + a1 * b2;
1010  xmm3 = xmm3 + a1 * b3;
1011  xmm4 = xmm4 + a1 * b4;
1012  xmm5 = xmm5 + a2 * b1;
1013  xmm6 = xmm6 + a2 * b2;
1014  xmm7 = xmm7 + a2 * b3;
1015  xmm8 = xmm8 + a2 * b4;
1016  }
1017  store( &(~C)(i ,j ), xmm1 );
1018  store( &(~C)(i ,j+IT::size ), xmm2 );
1019  store( &(~C)(i ,j+IT::size*2UL), xmm3 );
1020  store( &(~C)(i ,j+IT::size*3UL), xmm4 );
1021  store( &(~C)(i+1UL,j ), xmm5 );
1022  store( &(~C)(i+1UL,j+IT::size ), xmm6 );
1023  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 );
1024  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 );
1025  }
1026  if( i < M ) {
1027  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1028  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
1029  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
1030  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
1031  for( size_t k=0UL; k<K; ++k ) {
1032  const IntrinsicType a1( set( A(i,k) ) );
1033  xmm1 = xmm1 + a1 * B.get(k,j );
1034  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
1035  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
1036  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
1037  }
1038  store( &(~C)(i,j ), xmm1 );
1039  store( &(~C)(i,j+IT::size ), xmm2 );
1040  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1041  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1042  }
1043  }
1044  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1045  size_t i( 0UL );
1046  for( ; (i+2UL) <= M; i+=2UL ) {
1047  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1048  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size) ) );
1049  IntrinsicType xmm3( load( &(~C)(i+1UL,j ) ) );
1050  IntrinsicType xmm4( load( &(~C)(i+1UL,j+IT::size) ) );
1051  for( size_t k=0UL; k<K; ++k ) {
1052  const IntrinsicType a1( set( A(i ,k) ) );
1053  const IntrinsicType a2( set( A(i+1UL,k) ) );
1054  const IntrinsicType b1( B.get(k,j ) );
1055  const IntrinsicType b2( B.get(k,j+IT::size) );
1056  xmm1 = xmm1 + a1 * b1;
1057  xmm2 = xmm2 + a1 * b2;
1058  xmm3 = xmm3 + a2 * b1;
1059  xmm4 = xmm4 + a2 * b2;
1060  }
1061  store( &(~C)(i ,j ), xmm1 );
1062  store( &(~C)(i ,j+IT::size), xmm2 );
1063  store( &(~C)(i+1UL,j ), xmm3 );
1064  store( &(~C)(i+1UL,j+IT::size), xmm4 );
1065  }
1066  if( i < M ) {
1067  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1068  IntrinsicType xmm2( load( &(~C)(i,j+IT::size) ) );
1069  for( size_t k=0UL; k<K; ++k ) {
1070  const IntrinsicType a1( set( A(i,k) ) );
1071  xmm1 = xmm1 + a1 * B.get(k,j );
1072  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
1073  }
1074  store( &(~C)(i,j ), xmm1 );
1075  store( &(~C)(i,j+IT::size), xmm2 );
1076  }
1077  }
1078  if( j < N ) {
1079  size_t i( 0UL );
1080  for( ; (i+2UL) <= M; i+=2UL ) {
1081  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1082  IntrinsicType xmm2( load( &(~C)(i+1UL,j) ) );
1083  for( size_t k=0UL; k<K; ++k ) {
1084  const IntrinsicType b1( B.get(k,j) );
1085  xmm1 = xmm1 + set( A(i ,k) ) * b1;
1086  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
1087  }
1088  store( &(~C)(i ,j), xmm1 );
1089  store( &(~C)(i+1UL,j), xmm2 );
1090  }
1091  if( i < M ) {
1092  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1093  for( size_t k=0UL; k<K; ++k ) {
1094  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
1095  }
1096  store( &(~C)(i,j), xmm1 );
1097  }
1098  }
1099  }
1101  //**********************************************************************************************
1102 
1103  //**Vectorized default addition assignment to column-major dense matrices***********************
1117  template< typename MT3 // Type of the left-hand side target matrix
1118  , typename MT4 // Type of the left-hand side matrix operand
1119  , typename MT5 > // Type of the right-hand side matrix operand
1120  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1121  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1122  {
1123  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
1124  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
1125 
1126  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1127  const typename MT4::OppositeType tmp( A );
1128  addAssign( ~C, tmp * B );
1129  }
1130  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1131  const typename MT5::OppositeType tmp( B );
1132  addAssign( ~C, A * tmp );
1133  }
1134  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1135  const typename MT4::OppositeType tmp( A );
1136  addAssign( ~C, tmp * B );
1137  }
1138  else {
1139  const typename MT5::OppositeType tmp( B );
1140  addAssign( ~C, A * tmp );
1141  }
1142  }
1144  //**********************************************************************************************
1145 
1146  //**BLAS-based addition assignment to dense matrices (default)**********************************
1160  template< typename MT3 // Type of the left-hand side target matrix
1161  , typename MT4 // Type of the left-hand side matrix operand
1162  , typename MT5 > // Type of the right-hand side matrix operand
1163  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1164  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1165  {
1166  selectDefaultAddAssignKernel( C, A, B );
1167  }
1169  //**********************************************************************************************
1170 
1171  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1172 #if BLAZE_BLAS_MODE
1173 
1186  template< typename MT3 // Type of the left-hand side target matrix
1187  , typename MT4 // Type of the left-hand side matrix operand
1188  , typename MT5 > // Type of the right-hand side matrix operand
1189  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1190  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1191  {
1192  using boost::numeric_cast;
1193 
1194  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1195  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1196  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1197 
1198  const int M ( numeric_cast<int>( A.rows() ) );
1199  const int N ( numeric_cast<int>( B.columns() ) );
1200  const int K ( numeric_cast<int>( A.columns() ) );
1201  const int lda( numeric_cast<int>( A.spacing() ) );
1202  const int ldb( numeric_cast<int>( B.spacing() ) );
1203  const int ldc( numeric_cast<int>( C.spacing() ) );
1204 
1205  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1206  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1207  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1208  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1209  }
1211 #endif
1212  //**********************************************************************************************
1213 
1214  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1215 #if BLAZE_BLAS_MODE
1216 
1229  template< typename MT3 // Type of the left-hand side target matrix
1230  , typename MT4 // Type of the left-hand side matrix operand
1231  , typename MT5 > // Type of the right-hand side matrix operand
1232  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1233  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1234  {
1235  using boost::numeric_cast;
1236 
1237  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1238  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1239  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1240 
1241  const int M ( numeric_cast<int>( A.rows() ) );
1242  const int N ( numeric_cast<int>( B.columns() ) );
1243  const int K ( numeric_cast<int>( A.columns() ) );
1244  const int lda( numeric_cast<int>( A.spacing() ) );
1245  const int ldb( numeric_cast<int>( B.spacing() ) );
1246  const int ldc( numeric_cast<int>( C.spacing() ) );
1247 
1248  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1249  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1250  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1251  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1252  }
1254 #endif
1255  //**********************************************************************************************
1256 
1257  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1258 #if BLAZE_BLAS_MODE
1259 
1272  template< typename MT3 // Type of the left-hand side target matrix
1273  , typename MT4 // Type of the left-hand side matrix operand
1274  , typename MT5 > // Type of the right-hand side matrix operand
1275  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1276  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1277  {
1278  using boost::numeric_cast;
1279 
1280  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1281  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1282  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1283  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1284  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1285  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1286 
1287  const int M ( numeric_cast<int>( A.rows() ) );
1288  const int N ( numeric_cast<int>( B.columns() ) );
1289  const int K ( numeric_cast<int>( A.columns() ) );
1290  const int lda( numeric_cast<int>( A.spacing() ) );
1291  const int ldb( numeric_cast<int>( B.spacing() ) );
1292  const int ldc( numeric_cast<int>( C.spacing() ) );
1293  const complex<float> alpha( 1.0F, 0.0F );
1294  const complex<float> beta ( 1.0F, 0.0F );
1295 
1296  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1297  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1298  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1299  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1300  }
1302 #endif
1303  //**********************************************************************************************
1304 
1305  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1306 #if BLAZE_BLAS_MODE
1307 
1320  template< typename MT3 // Type of the left-hand side target matrix
1321  , typename MT4 // Type of the left-hand side matrix operand
1322  , typename MT5 > // Type of the right-hand side matrix operand
1323  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1324  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1325  {
1326  using boost::numeric_cast;
1327 
1328  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1329  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1330  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1331  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1332  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1333  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1334 
1335  const int M ( numeric_cast<int>( A.rows() ) );
1336  const int N ( numeric_cast<int>( B.columns() ) );
1337  const int K ( numeric_cast<int>( A.columns() ) );
1338  const int lda( numeric_cast<int>( A.spacing() ) );
1339  const int ldb( numeric_cast<int>( B.spacing() ) );
1340  const int ldc( numeric_cast<int>( C.spacing() ) );
1341  const complex<double> alpha( 1.0, 0.0 );
1342  const complex<double> beta ( 1.0, 0.0 );
1343 
1344  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1345  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1346  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1347  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1348  }
1350 #endif
1351  //**********************************************************************************************
1352 
1353  //**Addition assignment to sparse matrices******************************************************
1354  // No special implementation for the addition assignment to sparse matrices.
1355  //**********************************************************************************************
1356 
1357  //**Subtraction assignment to dense matrices****************************************************
1370  template< typename MT3 // Type of the target dense matrix
1371  , bool SO > // Storage order of the target dense matrix
1372  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
1373  {
1374  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1375  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1376 
1377  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1378  return;
1379  }
1380 
1381  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1382  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1383 
1384  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1385  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1386  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1387  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1388  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1389  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1390 
1391  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
1392  DMatDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1393  else
1394  DMatDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1395  }
1397  //**********************************************************************************************
1398 
1399  //**Default subtraction assignment to dense matrices********************************************
1413  template< typename MT3 // Type of the left-hand side target matrix
1414  , typename MT4 // Type of the left-hand side matrix operand
1415  , typename MT5 > // Type of the right-hand side matrix operand
1416  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1417  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1418  {
1419  const size_t M( A.rows() );
1420  const size_t N( B.columns() );
1421  const size_t K( A.columns() );
1422 
1423  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1424  const size_t end( N & size_t(-2) );
1425 
1426  for( size_t i=0UL; i<M; ++i ) {
1427  for( size_t k=0UL; k<K; ++k ) {
1428  for( size_t j=0UL; j<end; j+=2UL ) {
1429  C(i,j ) -= A(i,k) * B(k,j );
1430  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1431  }
1432  if( end < N ) {
1433  C(i,end) -= A(i,k) * B(k,end);
1434  }
1435  }
1436  }
1437  }
1439  //**********************************************************************************************
1440 
1441  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1455  template< typename MT3 // Type of the left-hand side target matrix
1456  , typename MT4 // Type of the left-hand side matrix operand
1457  , typename MT5 > // Type of the right-hand side matrix operand
1458  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1459  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1460  {
1461  typedef IntrinsicTrait<ElementType> IT;
1462 
1463  const size_t M( A.rows() );
1464  const size_t N( B.spacing() );
1465  const size_t K( A.columns() );
1466 
1467  size_t j( 0UL );
1468 
1469  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
1470  for( size_t i=0UL; i<M; ++i ) {
1471  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1472  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
1473  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
1474  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
1475  IntrinsicType xmm5( load( &(~C)(i,j+IT::size*4UL) ) );
1476  IntrinsicType xmm6( load( &(~C)(i,j+IT::size*5UL) ) );
1477  IntrinsicType xmm7( load( &(~C)(i,j+IT::size*6UL) ) );
1478  IntrinsicType xmm8( load( &(~C)(i,j+IT::size*7UL) ) );
1479  for( size_t k=0UL; k<K; ++k ) {
1480  const IntrinsicType a1( set( A(i,k) ) );
1481  xmm1 = xmm1 - a1 * B.get(k,j );
1482  xmm2 = xmm2 - a1 * B.get(k,j+IT::size );
1483  xmm3 = xmm3 - a1 * B.get(k,j+IT::size*2UL);
1484  xmm4 = xmm4 - a1 * B.get(k,j+IT::size*3UL);
1485  xmm5 = xmm5 - a1 * B.get(k,j+IT::size*4UL);
1486  xmm6 = xmm6 - a1 * B.get(k,j+IT::size*5UL);
1487  xmm7 = xmm7 - a1 * B.get(k,j+IT::size*6UL);
1488  xmm8 = xmm8 - a1 * B.get(k,j+IT::size*7UL);
1489  }
1490  store( &(~C)(i,j ), xmm1 );
1491  store( &(~C)(i,j+IT::size ), xmm2 );
1492  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1493  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1494  store( &(~C)(i,j+IT::size*4UL), xmm5 );
1495  store( &(~C)(i,j+IT::size*5UL), xmm6 );
1496  store( &(~C)(i,j+IT::size*6UL), xmm7 );
1497  store( &(~C)(i,j+IT::size*7UL), xmm8 );
1498  }
1499  }
1500  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
1501  size_t i( 0UL );
1502  for( ; (i+2UL) <= M; i+=2UL ) {
1503  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1504  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size ) ) );
1505  IntrinsicType xmm3( load( &(~C)(i ,j+IT::size*2UL) ) );
1506  IntrinsicType xmm4( load( &(~C)(i ,j+IT::size*3UL) ) );
1507  IntrinsicType xmm5( load( &(~C)(i+1UL,j ) ) );
1508  IntrinsicType xmm6( load( &(~C)(i+1UL,j+IT::size ) ) );
1509  IntrinsicType xmm7( load( &(~C)(i+1UL,j+IT::size*2UL) ) );
1510  IntrinsicType xmm8( load( &(~C)(i+1UL,j+IT::size*3UL) ) );
1511  for( size_t k=0UL; k<K; ++k ) {
1512  const IntrinsicType a1( set( A(i ,k) ) );
1513  const IntrinsicType a2( set( A(i+1UL,k) ) );
1514  const IntrinsicType b1( B.get(k,j ) );
1515  const IntrinsicType b2( B.get(k,j+IT::size ) );
1516  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
1517  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
1518  xmm1 = xmm1 - a1 * b1;
1519  xmm2 = xmm2 - a1 * b2;
1520  xmm3 = xmm3 - a1 * b3;
1521  xmm4 = xmm4 - a1 * b4;
1522  xmm5 = xmm5 - a2 * b1;
1523  xmm6 = xmm6 - a2 * b2;
1524  xmm7 = xmm7 - a2 * b3;
1525  xmm8 = xmm8 - a2 * b4;
1526  }
1527  store( &(~C)(i ,j ), xmm1 );
1528  store( &(~C)(i ,j+IT::size ), xmm2 );
1529  store( &(~C)(i ,j+IT::size*2UL), xmm3 );
1530  store( &(~C)(i ,j+IT::size*3UL), xmm4 );
1531  store( &(~C)(i+1UL,j ), xmm5 );
1532  store( &(~C)(i+1UL,j+IT::size ), xmm6 );
1533  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 );
1534  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 );
1535  }
1536  if( i < M ) {
1537  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1538  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
1539  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
1540  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
1541  for( size_t k=0UL; k<K; ++k ) {
1542  const IntrinsicType a1( set( A(i,k) ) );
1543  xmm1 = xmm1 - a1 * B.get(k,j );
1544  xmm2 = xmm2 - a1 * B.get(k,j+IT::size );
1545  xmm3 = xmm3 - a1 * B.get(k,j+IT::size*2UL);
1546  xmm4 = xmm4 - a1 * B.get(k,j+IT::size*3UL);
1547  }
1548  store( &(~C)(i,j ), xmm1 );
1549  store( &(~C)(i,j+IT::size ), xmm2 );
1550  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1551  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1552  }
1553  }
1554  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1555  size_t i( 0UL );
1556  for( ; (i+2UL) <= M; i+=2UL ) {
1557  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1558  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size) ) );
1559  IntrinsicType xmm3( load( &(~C)(i+1UL,j ) ) );
1560  IntrinsicType xmm4( load( &(~C)(i+1UL,j+IT::size) ) );
1561  for( size_t k=0UL; k<K; ++k ) {
1562  const IntrinsicType a1( set( A(i ,k) ) );
1563  const IntrinsicType a2( set( A(i+1UL,k) ) );
1564  const IntrinsicType b1( B.get(k,j ) );
1565  const IntrinsicType b2( B.get(k,j+IT::size) );
1566  xmm1 = xmm1 - a1 * b1;
1567  xmm2 = xmm2 - a1 * b2;
1568  xmm3 = xmm3 - a2 * b1;
1569  xmm4 = xmm4 - a2 * b2;
1570  }
1571  store( &(~C)(i ,j ), xmm1 );
1572  store( &(~C)(i ,j+IT::size), xmm2 );
1573  store( &(~C)(i+1UL,j ), xmm3 );
1574  store( &(~C)(i+1UL,j+IT::size), xmm4 );
1575  }
1576  if( i < M ) {
1577  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1578  IntrinsicType xmm2( load( &(~C)(i,j+IT::size) ) );
1579  for( size_t k=0UL; k<K; ++k ) {
1580  const IntrinsicType a1( set( A(i,k) ) );
1581  xmm1 = xmm1 - a1 * B.get(k,j );
1582  xmm2 = xmm2 - a1 * B.get(k,j+IT::size);
1583  }
1584  store( &(~C)(i,j ), xmm1 );
1585  store( &(~C)(i,j+IT::size), xmm2 );
1586  }
1587  }
1588  if( j < N ) {
1589  size_t i( 0UL );
1590  for( ; (i+2UL) <= M; i+=2UL ) {
1591  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1592  IntrinsicType xmm2( load( &(~C)(i+1UL,j) ) );
1593  for( size_t k=0UL; k<K; ++k ) {
1594  const IntrinsicType b1( B.get(k,j) );
1595  xmm1 = xmm1 - set( A(i ,k) ) * b1;
1596  xmm2 = xmm2 - set( A(i+1UL,k) ) * b1;
1597  }
1598  store( &(~C)(i ,j), xmm1 );
1599  store( &(~C)(i+1UL,j), xmm2 );
1600  }
1601  if( i < M ) {
1602  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1603  for( size_t k=0UL; k<K; ++k ) {
1604  xmm1 = xmm1 - set( A(i,k) ) * B.get(k,j);
1605  }
1606  store( &(~C)(i,j), xmm1 );
1607  }
1608  }
1609  }
1611  //**********************************************************************************************
1612 
1613  //**Vectorized default subtraction assignment to column-major dense matrices********************
1627  template< typename MT3 // Type of the left-hand side target matrix
1628  , typename MT4 // Type of the left-hand side matrix operand
1629  , typename MT5 > // Type of the right-hand side matrix operand
1630  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1631  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1632  {
1633  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
1634  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
1635 
1636  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1637  const typename MT4::OppositeType tmp( A );
1638  subAssign( ~C, tmp * B );
1639  }
1640  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1641  const typename MT5::OppositeType tmp( B );
1642  subAssign( ~C, A * tmp );
1643  }
1644  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1645  const typename MT4::OppositeType tmp( A );
1646  subAssign( ~C, tmp * B );
1647  }
1648  else {
1649  const typename MT5::OppositeType tmp( B );
1650  subAssign( ~C, A * tmp );
1651  }
1652  }
1654  //**********************************************************************************************
1655 
1656  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1670  template< typename MT3 // Type of the left-hand side target matrix
1671  , typename MT4 // Type of the left-hand side matrix operand
1672  , typename MT5 > // Type of the right-hand side matrix operand
1673  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1674  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1675  {
1676  selectDefaultSubAssignKernel( C, A, B );
1677  }
1679  //**********************************************************************************************
1680 
1681  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1682 #if BLAZE_BLAS_MODE
1683 
1696  template< typename MT3 // Type of the left-hand side target matrix
1697  , typename MT4 // Type of the left-hand side matrix operand
1698  , typename MT5 > // Type of the right-hand side matrix operand
1699  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1700  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1701  {
1702  using boost::numeric_cast;
1703 
1704  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1705  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1706  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1707 
1708  const int M ( numeric_cast<int>( A.rows() ) );
1709  const int N ( numeric_cast<int>( B.columns() ) );
1710  const int K ( numeric_cast<int>( A.columns() ) );
1711  const int lda( numeric_cast<int>( A.spacing() ) );
1712  const int ldb( numeric_cast<int>( B.spacing() ) );
1713  const int ldc( numeric_cast<int>( C.spacing() ) );
1714 
1715  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1716  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1717  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1718  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1719  }
1721 #endif
1722  //**********************************************************************************************
1723 
1724  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1725 #if BLAZE_BLAS_MODE
1726 
1739  template< typename MT3 // Type of the left-hand side target matrix
1740  , typename MT4 // Type of the left-hand side matrix operand
1741  , typename MT5 > // Type of the right-hand side matrix operand
1742  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1743  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1744  {
1745  using boost::numeric_cast;
1746 
1747  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1748  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1749  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1750 
1751  const int M ( numeric_cast<int>( A.rows() ) );
1752  const int N ( numeric_cast<int>( B.columns() ) );
1753  const int K ( numeric_cast<int>( A.columns() ) );
1754  const int lda( numeric_cast<int>( A.spacing() ) );
1755  const int ldb( numeric_cast<int>( B.spacing() ) );
1756  const int ldc( numeric_cast<int>( C.spacing() ) );
1757 
1758  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1759  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1760  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1761  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1762  }
1764 #endif
1765  //**********************************************************************************************
1766 
1767  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1768 #if BLAZE_BLAS_MODE
1769 
1782  template< typename MT3 // Type of the left-hand side target matrix
1783  , typename MT4 // Type of the left-hand side matrix operand
1784  , typename MT5 > // Type of the right-hand side matrix operand
1785  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1786  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1787  {
1788  using boost::numeric_cast;
1789 
1790  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1791  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1792  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1793  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1794  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1795  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1796 
1797  const int M ( numeric_cast<int>( A.rows() ) );
1798  const int N ( numeric_cast<int>( B.columns() ) );
1799  const int K ( numeric_cast<int>( A.columns() ) );
1800  const int lda( numeric_cast<int>( A.spacing() ) );
1801  const int ldb( numeric_cast<int>( B.spacing() ) );
1802  const int ldc( numeric_cast<int>( C.spacing() ) );
1803  const complex<float> alpha( -1.0F, 0.0F );
1804  const complex<float> beta ( 1.0F, 0.0F );
1805 
1806  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1807  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1808  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1809  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1810  }
1812 #endif
1813  //**********************************************************************************************
1814 
1815  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1816 #if BLAZE_BLAS_MODE
1817 
1830  template< typename MT3 // Type of the left-hand side target matrix
1831  , typename MT4 // Type of the left-hand side matrix operand
1832  , typename MT5 > // Type of the right-hand side matrix operand
1833  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1834  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1835  {
1836  using boost::numeric_cast;
1837 
1838  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1839  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1840  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1841  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1842  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1843  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1844 
1845  const int M ( numeric_cast<int>( A.rows() ) );
1846  const int N ( numeric_cast<int>( B.columns() ) );
1847  const int K ( numeric_cast<int>( A.columns() ) );
1848  const int lda( numeric_cast<int>( A.spacing() ) );
1849  const int ldb( numeric_cast<int>( B.spacing() ) );
1850  const int ldc( numeric_cast<int>( C.spacing() ) );
1851  const complex<double> alpha( -1.0, 0.0 );
1852  const complex<double> beta ( 1.0, 0.0 );
1853 
1854  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1855  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1856  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1857  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1858  }
1860 #endif
1861  //**********************************************************************************************
1862 
1863  //**Subtraction assignment to sparse matrices***************************************************
1864  // No special implementation for the subtraction assignment to sparse matrices.
1865  //**********************************************************************************************
1866 
1867  //**Multiplication assignment to dense matrices*************************************************
1868  // No special implementation for the multiplication assignment to dense matrices.
1869  //**********************************************************************************************
1870 
1871  //**Multiplication assignment to sparse matrices************************************************
1872  // No special implementation for the multiplication assignment to sparse matrices.
1873  //**********************************************************************************************
1874 
1875  //**Compile time checks*************************************************************************
1882  //**********************************************************************************************
1883 };
1884 //*************************************************************************************************
1885 
1886 
1887 
1888 
1889 //=================================================================================================
1890 //
1891 // DMATSCALARMULTEXPR SPECIALIZATION
1892 //
1893 //=================================================================================================
1894 
1895 //*************************************************************************************************
1903 template< typename MT1 // Type of the left-hand side dense matrix
1904  , typename MT2 // Type of the right-hand side dense matrix
1905  , typename ST > // Type of the right-hand side scalar value
1906 class DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >
1907  : public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
1908  , private Expression
1909  , private Computation
1910 {
1911  private:
1912  //**Type definitions****************************************************************************
1913  typedef DMatDMatMultExpr<MT1,MT2> MMM;
1914  typedef typename MMM::ResultType RES;
1915  typedef typename MT1::ResultType RT1;
1916  typedef typename MT2::ResultType RT2;
1917  typedef typename MT1::CompositeType CT1;
1918  typedef typename MT2::CompositeType CT2;
1919  //**********************************************************************************************
1920 
1921  //**********************************************************************************************
1923 
1926  template< typename T1, typename T2, typename T3, typename T4 >
1927  struct UseSinglePrecisionKernel {
1928  enum { value = IsFloat<typename T1::ElementType>::value &&
1929  IsFloat<typename T2::ElementType>::value &&
1930  IsFloat<typename T3::ElementType>::value &&
1931  !IsComplex<T4>::value };
1932  };
1933  //**********************************************************************************************
1934 
1935  //**********************************************************************************************
1937 
1940  template< typename T1, typename T2, typename T3, typename T4 >
1941  struct UseDoublePrecisionKernel {
1942  enum { value = IsDouble<typename T1::ElementType>::value &&
1943  IsDouble<typename T2::ElementType>::value &&
1944  IsDouble<typename T3::ElementType>::value &&
1945  !IsComplex<T4>::value };
1946  };
1947  //**********************************************************************************************
1948 
1949  //**********************************************************************************************
1951 
1954  template< typename T1, typename T2, typename T3 >
1955  struct UseSinglePrecisionComplexKernel {
1956  typedef complex<float> Type;
1957  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1958  IsSame<typename T2::ElementType,Type>::value &&
1959  IsSame<typename T3::ElementType,Type>::value };
1960  };
1961  //**********************************************************************************************
1962 
1963  //**********************************************************************************************
1965 
1968  template< typename T1, typename T2, typename T3 >
1969  struct UseDoublePrecisionComplexKernel {
1970  typedef complex<double> Type;
1971  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1972  IsSame<typename T2::ElementType,Type>::value &&
1973  IsSame<typename T3::ElementType,Type>::value };
1974  };
1975  //**********************************************************************************************
1976 
1977  //**********************************************************************************************
1979 
1981  template< typename T1, typename T2, typename T3, typename T4 >
1982  struct UseDefaultKernel {
1983  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1984  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1985  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1986  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1987  };
1988  //**********************************************************************************************
1989 
1990  //**********************************************************************************************
1992 
1994  template< typename T1, typename T2, typename T3, typename T4 >
1995  struct UseVectorizedDefaultKernel {
1996  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1997  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
1998  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
1999  IsSame<typename T1::ElementType,T4>::value &&
2000  IntrinsicTrait<typename T1::ElementType>::addition &&
2001  IntrinsicTrait<typename T1::ElementType>::multiplication };
2002  };
2003  //**********************************************************************************************
2004 
2005  public:
2006  //**Type definitions****************************************************************************
2007  typedef DMatScalarMultExpr<MMM,ST,false> This;
2008  typedef typename MultTrait<RES,ST>::Type ResultType;
2009  typedef typename ResultType::OppositeType OppositeType;
2010  typedef typename ResultType::TransposeType TransposeType;
2011  typedef typename ResultType::ElementType ElementType;
2012  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2013  typedef const ElementType ReturnType;
2014  typedef const ResultType CompositeType;
2015 
2017  typedef const DMatDMatMultExpr<MT1,MT2> LeftOperand;
2018 
2020  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
2021 
2023  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
2024 
2026  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
2027  //**********************************************************************************************
2028 
2029  //**Compilation flags***************************************************************************
2031  enum { vectorizable = 0 };
2032 
2034  enum { canAlias = CanAlias<MMM>::value };
2035  //**********************************************************************************************
2036 
2037  //**Constructor*********************************************************************************
2043  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2044  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2045  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2046  {}
2047  //**********************************************************************************************
2048 
2049  //**Access operator*****************************************************************************
2056  inline ReturnType operator()( size_t i, size_t j ) const {
2057  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2058  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2059  return matrix_(i,j) * scalar_;
2060  }
2061  //**********************************************************************************************
2062 
2063  //**Rows function*******************************************************************************
2068  inline size_t rows() const {
2069  return matrix_.rows();
2070  }
2071  //**********************************************************************************************
2072 
2073  //**Columns function****************************************************************************
2078  inline size_t columns() const {
2079  return matrix_.columns();
2080  }
2081  //**********************************************************************************************
2082 
2083  //**Left operand access*************************************************************************
2088  inline LeftOperand leftOperand() const {
2089  return matrix_;
2090  }
2091  //**********************************************************************************************
2092 
2093  //**Right operand access************************************************************************
2098  inline RightOperand rightOperand() const {
2099  return scalar_;
2100  }
2101  //**********************************************************************************************
2102 
2103  //**********************************************************************************************
2109  template< typename T >
2110  inline bool isAliased( const T* alias ) const {
2111  return matrix_.isAliased( alias );
2112  }
2113  //**********************************************************************************************
2114 
2115  private:
2116  //**Member variables****************************************************************************
2117  LeftOperand matrix_;
2118  RightOperand scalar_;
2119  //**********************************************************************************************
2120 
2121  //**Assignment to dense matrices****************************************************************
2130  template< typename MT3 // Type of the target dense matrix
2131  , bool SO > // Storage order of the target dense matrix
2132  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2133  {
2134  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2135  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2136 
2137  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2138  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2139 
2140  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2141  return;
2142  }
2143  else if( left.columns() == 0UL ) {
2144  reset( ~lhs );
2145  return;
2146  }
2147 
2148  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2149  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2150 
2151  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2152  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2153  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2154  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2155  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2156  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2157 
2158  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
2159  DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2160  else
2161  DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2162  }
2163  //**********************************************************************************************
2164 
2165  //**Default assignment to dense matrices********************************************************
2179  template< typename MT3 // Type of the left-hand side target matrix
2180  , typename MT4 // Type of the left-hand side matrix operand
2181  , typename MT5 // Type of the right-hand side matrix operand
2182  , typename ST2 > // Type of the scalar value
2183  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2184  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2185  {
2186  const size_t M( A.rows() );
2187  const size_t N( B.columns() );
2188  const size_t K( A.columns() );
2189 
2190  for( size_t i=0UL; i<M; ++i ) {
2191  for( size_t j=0UL; j<N; ++j ) {
2192  C(i,j) = A(i,0UL) * B(0UL,j);
2193  }
2194  for( size_t k=1UL; k<K; ++k ) {
2195  for( size_t j=0UL; j<N; ++j ) {
2196  C(i,j) += A(i,k) * B(k,j);
2197  }
2198  }
2199  for( size_t j=0UL; j<N; ++j ) {
2200  C(i,j) *= scalar;
2201  }
2202  }
2203  }
2204  //**********************************************************************************************
2205 
2206  //**Vectorized default assignment to row-major dense matrices***********************************
2220  template< typename MT3 // Type of the left-hand side target matrix
2221  , typename MT4 // Type of the left-hand side matrix operand
2222  , typename MT5 // Type of the right-hand side matrix operand
2223  , typename ST2 > // Type of the scalar value
2224  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2225  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2226  {
2227  typedef IntrinsicTrait<ElementType> IT;
2228 
2229  const size_t M( A.rows() );
2230  const size_t N( B.spacing() );
2231  const size_t K( A.columns() );
2232 
2233  const IntrinsicType factor( set( scalar ) );
2234 
2235  size_t j( 0UL );
2236 
2237  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2238  for( size_t i=0UL; i<M; ++i ) {
2239  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2240  for( size_t k=0UL; k<K; ++k ) {
2241  const IntrinsicType a1( set( A(i,k) ) );
2242  xmm1 = xmm1 + a1 * B.get(k,j );
2243  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2244  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2245  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2246  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
2247  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
2248  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
2249  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
2250  }
2251  store( &(~C)(i,j ), xmm1 * factor );
2252  store( &(~C)(i,j+IT::size ), xmm2 * factor );
2253  store( &(~C)(i,j+IT::size*2UL), xmm3 * factor );
2254  store( &(~C)(i,j+IT::size*3UL), xmm4 * factor );
2255  store( &(~C)(i,j+IT::size*4UL), xmm5 * factor );
2256  store( &(~C)(i,j+IT::size*5UL), xmm6 * factor );
2257  store( &(~C)(i,j+IT::size*6UL), xmm7 * factor );
2258  store( &(~C)(i,j+IT::size*7UL), xmm8 * factor );
2259  }
2260  }
2261  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2262  size_t i( 0UL );
2263  for( ; (i+2UL) <= M; i+=2UL ) {
2264  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2265  for( size_t k=0UL; k<K; ++k ) {
2266  const IntrinsicType a1( set( A(i ,k) ) );
2267  const IntrinsicType a2( set( A(i+1UL,k) ) );
2268  const IntrinsicType b1( B.get(k,j ) );
2269  const IntrinsicType b2( B.get(k,j+IT::size ) );
2270  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
2271  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
2272  xmm1 = xmm1 + a1 * b1;
2273  xmm2 = xmm2 + a1 * b2;
2274  xmm3 = xmm3 + a1 * b3;
2275  xmm4 = xmm4 + a1 * b4;
2276  xmm5 = xmm5 + a2 * b1;
2277  xmm6 = xmm6 + a2 * b2;
2278  xmm7 = xmm7 + a2 * b3;
2279  xmm8 = xmm8 + a2 * b4;
2280  }
2281  store( &(~C)(i ,j ), xmm1 * factor );
2282  store( &(~C)(i ,j+IT::size ), xmm2 * factor );
2283  store( &(~C)(i ,j+IT::size*2UL), xmm3 * factor );
2284  store( &(~C)(i ,j+IT::size*3UL), xmm4 * factor );
2285  store( &(~C)(i+1UL,j ), xmm5 * factor );
2286  store( &(~C)(i+1UL,j+IT::size ), xmm6 * factor );
2287  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 * factor );
2288  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 * factor );
2289  }
2290  if( i < M ) {
2291  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2292  for( size_t k=0UL; k<K; ++k ) {
2293  const IntrinsicType a1( set( A(i,k) ) );
2294  xmm1 = xmm1 + a1 * B.get(k,j );
2295  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2296  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2297  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2298  }
2299  store( &(~C)(i,j ), xmm1 * factor );
2300  store( &(~C)(i,j+IT::size ), xmm2 * factor );
2301  store( &(~C)(i,j+IT::size*2UL), xmm3 * factor );
2302  store( &(~C)(i,j+IT::size*3UL), xmm4 * factor );
2303  }
2304  }
2305  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2306  size_t i( 0UL );
2307  for( ; (i+2UL) <= M; i+=2UL ) {
2308  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2309  for( size_t k=0UL; k<K; ++k ) {
2310  const IntrinsicType a1( set( A(i ,k) ) );
2311  const IntrinsicType a2( set( A(i+1UL,k) ) );
2312  const IntrinsicType b1( B.get(k,j ) );
2313  const IntrinsicType b2( B.get(k,j+IT::size) );
2314  xmm1 = xmm1 + a1 * b1;
2315  xmm2 = xmm2 + a1 * b2;
2316  xmm3 = xmm3 + a2 * b1;
2317  xmm4 = xmm4 + a2 * b2;
2318  }
2319  store( &(~C)(i ,j ), xmm1 * factor );
2320  store( &(~C)(i ,j+IT::size), xmm2 * factor );
2321  store( &(~C)(i+1UL,j ), xmm3 * factor );
2322  store( &(~C)(i+1UL,j+IT::size), xmm4 * factor );
2323  }
2324  if( i < M ) {
2325  IntrinsicType xmm1, xmm2;
2326  for( size_t k=0UL; k<K; ++k ) {
2327  const IntrinsicType a1( set( A(i,k) ) );
2328  xmm1 = xmm1 + a1 * B.get(k,j );
2329  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
2330  }
2331  store( &(~C)(i,j ), xmm1 * factor );
2332  store( &(~C)(i,j+IT::size), xmm2 * factor );
2333  }
2334  }
2335  if( j < N ) {
2336  size_t i( 0UL );
2337  for( ; (i+2UL) <= M; i+=2UL ) {
2338  IntrinsicType xmm1, xmm2;
2339  for( size_t k=0UL; k<K; ++k ) {
2340  const IntrinsicType b1( B.get(k,j) );
2341  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2342  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2343  }
2344  store( &(~C)(i ,j), xmm1 * factor );
2345  store( &(~C)(i+1UL,j), xmm2 * factor );
2346  }
2347  if( i < M ) {
2348  IntrinsicType xmm1;
2349  for( size_t k=0UL; k<K; ++k ) {
2350  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
2351  }
2352  store( &(~C)(i,j), xmm1 * factor );
2353  }
2354  }
2355  }
2356  //**********************************************************************************************
2357 
2358  //**Vectorized default assignment to column-major dense matrices********************************
2372  template< typename MT3 // Type of the left-hand side target matrix
2373  , typename MT4 // Type of the left-hand side matrix operand
2374  , typename MT5 // Type of the right-hand side matrix operand
2375  , typename ST2 > // Type of the scalar value
2376  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2377  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2378  {
2379  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2380  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2381 
2382  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2383  const typename MT4::OppositeType tmp( A );
2384  assign( ~C, tmp * B * scalar );
2385  }
2386  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2387  const typename MT5::OppositeType tmp( B );
2388  assign( ~C, A * tmp * scalar );
2389  }
2390  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2391  const typename MT4::OppositeType tmp( A );
2392  assign( ~C, tmp * B * scalar );
2393  }
2394  else {
2395  const typename MT5::OppositeType tmp( B );
2396  assign( ~C, A * tmp * scalar );
2397  }
2398  }
2399  //**********************************************************************************************
2400 
2401  //**BLAS-based assignment to dense matrices (default)*******************************************
2415  template< typename MT3 // Type of the left-hand side target matrix
2416  , typename MT4 // Type of the left-hand side matrix operand
2417  , typename MT5 // Type of the right-hand side matrix operand
2418  , typename ST2 > // Type of the scalar value
2419  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2420  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2421  {
2422  selectDefaultAssignKernel( C, A, B, scalar );
2423  }
2424  //**********************************************************************************************
2425 
2426  //**BLAS-based assignment to dense matrices (single precision)**********************************
2427 #if BLAZE_BLAS_MODE
2428 
2441  template< typename MT3 // Type of the left-hand side target matrix
2442  , typename MT4 // Type of the left-hand side matrix operand
2443  , typename MT5 // Type of the right-hand side matrix operand
2444  , typename ST2 > // Type of the scalar value
2445  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2446  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2447  {
2448  using boost::numeric_cast;
2449 
2450  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2451  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2452  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2453 
2454  const int M ( numeric_cast<int>( A.rows() ) );
2455  const int N ( numeric_cast<int>( B.columns() ) );
2456  const int K ( numeric_cast<int>( A.columns() ) );
2457  const int lda( numeric_cast<int>( A.spacing() ) );
2458  const int ldb( numeric_cast<int>( B.spacing() ) );
2459  const int ldc( numeric_cast<int>( C.spacing() ) );
2460 
2461  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2462  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2463  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2464  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2465  }
2466 #endif
2467  //**********************************************************************************************
2468 
2469  //**BLAS-based assignment to dense matrices (double precision)**********************************
2470 #if BLAZE_BLAS_MODE
2471 
2484  template< typename MT3 // Type of the left-hand side target matrix
2485  , typename MT4 // Type of the left-hand side matrix operand
2486  , typename MT5 // Type of the right-hand side matrix operand
2487  , typename ST2 > // Type of the scalar value
2488  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2489  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2490  {
2491  using boost::numeric_cast;
2492 
2493  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
2494  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
2495  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
2496 
2497  const int M ( numeric_cast<int>( A.rows() ) );
2498  const int N ( numeric_cast<int>( B.columns() ) );
2499  const int K ( numeric_cast<int>( A.columns() ) );
2500  const int lda( numeric_cast<int>( A.spacing() ) );
2501  const int ldb( numeric_cast<int>( B.spacing() ) );
2502  const int ldc( numeric_cast<int>( C.spacing() ) );
2503 
2504  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2505  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2506  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2507  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2508  }
2509 #endif
2510  //**********************************************************************************************
2511 
2512  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2513 #if BLAZE_BLAS_MODE
2514 
2527  template< typename MT3 // Type of the left-hand side target matrix
2528  , typename MT4 // Type of the left-hand side matrix operand
2529  , typename MT5 // Type of the right-hand side matrix operand
2530  , typename ST2 > // Type of the scalar value
2531  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2532  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2533  {
2534  using boost::numeric_cast;
2535 
2536  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2537  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2538  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2540  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2541  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2542  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2543 
2544  const int M ( numeric_cast<int>( A.rows() ) );
2545  const int N ( numeric_cast<int>( B.columns() ) );
2546  const int K ( numeric_cast<int>( A.columns() ) );
2547  const int lda( numeric_cast<int>( A.spacing() ) );
2548  const int ldb( numeric_cast<int>( B.spacing() ) );
2549  const int ldc( numeric_cast<int>( C.spacing() ) );
2550  const complex<float> alpha( scalar );
2551  const complex<float> beta ( 0.0F, 0.0F );
2552 
2553  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2554  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2555  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2556  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2557  }
2558 #endif
2559  //**********************************************************************************************
2560 
2561  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2562 #if BLAZE_BLAS_MODE
2563 
2576  template< typename MT3 // Type of the left-hand side target matrix
2577  , typename MT4 // Type of the left-hand side matrix operand
2578  , typename MT5 // Type of the right-hand side matrix operand
2579  , typename ST2 > // Type of the scalar
2580  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2581  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2582  {
2583  using boost::numeric_cast;
2584 
2585  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2586  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2587  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2589  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2590  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2591  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2592 
2593  const int M ( numeric_cast<int>( A.rows() ) );
2594  const int N ( numeric_cast<int>( B.columns() ) );
2595  const int K ( numeric_cast<int>( A.columns() ) );
2596  const int lda( numeric_cast<int>( A.spacing() ) );
2597  const int ldb( numeric_cast<int>( B.spacing() ) );
2598  const int ldc( numeric_cast<int>( C.spacing() ) );
2599  const complex<double> alpha( scalar );
2600  const complex<double> beta ( 0.0, 0.0 );
2601 
2602  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2603  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2604  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2605  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2606  }
2607 #endif
2608  //**********************************************************************************************
2609 
2610  //**Assignment to sparse matrices***************************************************************
2621  template< typename MT // Type of the target sparse matrix
2622  , bool SO > // Storage order of the target sparse matrix
2623  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2624  {
2625  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2626 
2632  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
2633 
2634  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2635  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2636 
2637  const TmpType tmp( rhs );
2638  assign( ~lhs, tmp );
2639  }
2640  //**********************************************************************************************
2641 
2642  //**Addition assignment to dense matrices*******************************************************
2654  template< typename MT3 // Type of the target dense matrix
2655  , bool SO > // Storage order of the target dense matrix
2656  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2657  {
2658  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2659  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2660 
2661  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2662  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2663 
2664  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2665  return;
2666  }
2667 
2668  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2669  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2670 
2671  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2672  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2673  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2674  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2675  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2676  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2677 
2678  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
2679  DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2680  else
2681  DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2682  }
2683  //**********************************************************************************************
2684 
2685  //**Default addition assignment to dense matrices***********************************************
2699  template< typename MT3 // Type of the left-hand side target matrix
2700  , typename MT4 // Type of the left-hand side matrix operand
2701  , typename MT5 // Type of the right-hand side matrix operand
2702  , typename ST2 > // Type of the scalar value
2703  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2704  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2705  {
2706  const ResultType tmp( A * B * scalar );
2707  addAssign( C, tmp );
2708  }
2709  //**********************************************************************************************
2710 
2711  //**Vectorized default addition assignment to row-major dense matrices**************************
2725  template< typename MT3 // Type of the left-hand side target matrix
2726  , typename MT4 // Type of the left-hand side matrix operand
2727  , typename MT5 // Type of the right-hand side matrix operand
2728  , typename ST2 > // Type of the scalar value
2729  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2730  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2731  {
2732  typedef IntrinsicTrait<ElementType> IT;
2733 
2734  const size_t M( A.rows() );
2735  const size_t N( B.spacing() );
2736  const size_t K( A.columns() );
2737 
2738  const IntrinsicType factor( set( scalar ) );
2739 
2740  size_t j( 0UL );
2741 
2742  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2743  for( size_t i=0UL; i<M; ++i ) {
2744  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2745  for( size_t k=0UL; k<K; ++k ) {
2746  const IntrinsicType a1( set( A(i,k) ) );
2747  xmm1 = xmm1 + a1 * B.get(k,j );
2748  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2749  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2750  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2751  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
2752  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
2753  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
2754  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
2755  }
2756  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2757  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) + xmm2 * factor );
2758  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) + xmm3 * factor );
2759  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) + xmm4 * factor );
2760  store( &(~C)(i,j+IT::size*4UL), load( &(~C)(i,j+IT::size*4UL) ) + xmm5 * factor );
2761  store( &(~C)(i,j+IT::size*5UL), load( &(~C)(i,j+IT::size*5UL) ) + xmm6 * factor );
2762  store( &(~C)(i,j+IT::size*6UL), load( &(~C)(i,j+IT::size*6UL) ) + xmm7 * factor );
2763  store( &(~C)(i,j+IT::size*7UL), load( &(~C)(i,j+IT::size*7UL) ) + xmm8 * factor );
2764  }
2765  }
2766  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2767  size_t i( 0UL );
2768  for( ; (i+2UL) <= M; i+=2UL ) {
2769  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2770  for( size_t k=0UL; k<K; ++k ) {
2771  const IntrinsicType a1( set( A(i ,k) ) );
2772  const IntrinsicType a2( set( A(i+1UL,k) ) );
2773  const IntrinsicType b1( B.get(k,j ) );
2774  const IntrinsicType b2( B.get(k,j+IT::size ) );
2775  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
2776  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
2777  xmm1 = xmm1 + a1 * b1;
2778  xmm2 = xmm2 + a1 * b2;
2779  xmm3 = xmm3 + a1 * b3;
2780  xmm4 = xmm4 + a1 * b4;
2781  xmm5 = xmm5 + a2 * b1;
2782  xmm6 = xmm6 + a2 * b2;
2783  xmm7 = xmm7 + a2 * b3;
2784  xmm8 = xmm8 + a2 * b4;
2785  }
2786  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2787  store( &(~C)(i ,j+IT::size ), load( &(~C)(i ,j+IT::size ) ) + xmm2 * factor );
2788  store( &(~C)(i ,j+IT::size*2UL), load( &(~C)(i ,j+IT::size*2UL) ) + xmm3 * factor );
2789  store( &(~C)(i ,j+IT::size*3UL), load( &(~C)(i ,j+IT::size*3UL) ) + xmm4 * factor );
2790  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) + xmm5 * factor );
2791  store( &(~C)(i+1UL,j+IT::size ), load( &(~C)(i+1UL,j+IT::size ) ) + xmm6 * factor );
2792  store( &(~C)(i+1UL,j+IT::size*2UL), load( &(~C)(i+1UL,j+IT::size*2UL) ) + xmm7 * factor );
2793  store( &(~C)(i+1UL,j+IT::size*3UL), load( &(~C)(i+1UL,j+IT::size*3UL) ) + xmm8 * factor );
2794  }
2795  if( i < M ) {
2796  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2797  for( size_t k=0UL; k<K; ++k ) {
2798  const IntrinsicType a1( set( A(i,k) ) );
2799  xmm1 = xmm1 + a1 * B.get(k,j );
2800  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2801  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2802  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2803  }
2804  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2805  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) + xmm2 * factor );
2806  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) + xmm3 * factor );
2807  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) + xmm4 * factor );
2808  }
2809  }
2810  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2811  size_t i( 0UL );
2812  for( ; (i+2UL) <= M; i+=2UL ) {
2813  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2814  for( size_t k=0UL; k<K; ++k ) {
2815  const IntrinsicType a1( set( A(i ,k) ) );
2816  const IntrinsicType a2( set( A(i+1UL,k) ) );
2817  const IntrinsicType b1( B.get(k,j ) );
2818  const IntrinsicType b2( B.get(k,j+IT::size) );
2819  xmm1 = xmm1 + a1 * b1;
2820  xmm2 = xmm2 + a1 * b2;
2821  xmm3 = xmm3 + a2 * b1;
2822  xmm4 = xmm4 + a2 * b2;
2823  }
2824  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2825  store( &(~C)(i ,j+IT::size), load( &(~C)(i ,j+IT::size) ) + xmm2 * factor );
2826  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) + xmm3 * factor );
2827  store( &(~C)(i+1UL,j+IT::size), load( &(~C)(i+1UL,j+IT::size) ) + xmm4 * factor );
2828  }
2829  if( i < M ) {
2830  IntrinsicType xmm1, xmm2;
2831  for( size_t k=0UL; k<K; ++k ) {
2832  const IntrinsicType a1( set( A(i,k) ) );
2833  xmm1 = xmm1 + a1 * B.get(k,j );
2834  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
2835  }
2836  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2837  store( &(~C)(i,j+IT::size), load( &(~C)(i,j+IT::size) ) + xmm2 * factor );
2838  }
2839  }
2840  if( j < N ) {
2841  size_t i( 0UL );
2842  for( ; (i+2UL) <= M; i+=2UL ) {
2843  IntrinsicType xmm1, xmm2;
2844  for( size_t k=0UL; k<K; ++k ) {
2845  const IntrinsicType b1( B.get(k,j) );
2846  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2847  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2848  }
2849  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2850  store( &(~C)(i+1UL,j), load( &(~C)(i+1UL,j) ) + xmm2 * factor );
2851  }
2852  if( i < M ) {
2853  IntrinsicType xmm1;
2854  for( size_t k=0UL; k<K; ++k ) {
2855  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
2856  }
2857  store( &(~C)(i,j), load( &(~C)(i,j) ) + xmm1 * factor );
2858  }
2859  }
2860  }
2861  //**********************************************************************************************
2862 
2863  //**Vectorized default addition assignment to column-major dense matrices***********************
2877  template< typename MT3 // Type of the left-hand side target matrix
2878  , typename MT4 // Type of the left-hand side matrix operand
2879  , typename MT5 // Type of the right-hand side matrix operand
2880  , typename ST2 > // Type of the scalar value
2881  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2882  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2883  {
2884  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2885  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2886 
2887  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2888  const typename MT4::OppositeType tmp( A );
2889  addAssign( ~C, tmp * B * scalar );
2890  }
2891  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2892  const typename MT5::OppositeType tmp( B );
2893  addAssign( ~C, A * tmp * scalar );
2894  }
2895  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2896  const typename MT4::OppositeType tmp( A );
2897  addAssign( ~C, tmp * B * scalar );
2898  }
2899  else {
2900  const typename MT5::OppositeType tmp( B );
2901  addAssign( ~C, A * tmp * scalar );
2902  }
2903  }
2904  //**********************************************************************************************
2905 
2906  //**BLAS-based addition assignment to dense matrices (default)**********************************
2920  template< typename MT3 // Type of the left-hand side target matrix
2921  , typename MT4 // Type of the left-hand side matrix operand
2922  , typename MT5 // Type of the right-hand side matrix operand
2923  , typename ST2 > // Type of the scalar value
2924  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2925  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2926  {
2927  selectDefaultAddAssignKernel( C, A, B, scalar );
2928  }
2929  //**********************************************************************************************
2930 
2931  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2932 #if BLAZE_BLAS_MODE
2933 
2946  template< typename MT3 // Type of the left-hand side target matrix
2947  , typename MT4 // Type of the left-hand side matrix operand
2948  , typename MT5 // Type of the right-hand side matrix operand
2949  , typename ST2 > // Type of the scalar value
2950  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2951  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2952  {
2953  using boost::numeric_cast;
2954 
2955  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2956  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2957  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2958 
2959  const int M ( numeric_cast<int>( A.rows() ) );
2960  const int N ( numeric_cast<int>( B.columns() ) );
2961  const int K ( numeric_cast<int>( A.columns() ) );
2962  const int lda( numeric_cast<int>( A.spacing() ) );
2963  const int ldb( numeric_cast<int>( B.spacing() ) );
2964  const int ldc( numeric_cast<int>( C.spacing() ) );
2965 
2966  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2967  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2968  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2969  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
2970  }
2971 #endif
2972  //**********************************************************************************************
2973 
2974  //**BLAS-based addition assignment to dense matrices (double precision)*************************
2975 #if BLAZE_BLAS_MODE
2976 
2989  template< typename MT3 // Type of the left-hand side target matrix
2990  , typename MT4 // Type of the left-hand side matrix operand
2991  , typename MT5 // Type of the right-hand side matrix operand
2992  , typename ST2 > // Type of the scalar value
2993  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2994  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2995  {
2996  using boost::numeric_cast;
2997 
2998  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
2999  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3000  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3001 
3002  const int M ( numeric_cast<int>( A.rows() ) );
3003  const int N ( numeric_cast<int>( B.columns() ) );
3004  const int K ( numeric_cast<int>( A.columns() ) );
3005  const int lda( numeric_cast<int>( A.spacing() ) );
3006  const int ldb( numeric_cast<int>( B.spacing() ) );
3007  const int ldc( numeric_cast<int>( C.spacing() ) );
3008 
3009  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3010  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3011  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3012  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3013  }
3014 #endif
3015  //**********************************************************************************************
3016 
3017  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3018 #if BLAZE_BLAS_MODE
3019 
3032  template< typename MT3 // Type of the left-hand side target matrix
3033  , typename MT4 // Type of the left-hand side matrix operand
3034  , typename MT5 // Type of the right-hand side matrix operand
3035  , typename ST2 > // Type of the scalar value
3036  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3037  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3038  {
3039  using boost::numeric_cast;
3040 
3041  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3042  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3043  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3045  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3046  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3047  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3048 
3049  const int M ( numeric_cast<int>( A.rows() ) );
3050  const int N ( numeric_cast<int>( B.columns() ) );
3051  const int K ( numeric_cast<int>( A.columns() ) );
3052  const int lda( numeric_cast<int>( A.spacing() ) );
3053  const int ldb( numeric_cast<int>( B.spacing() ) );
3054  const int ldc( numeric_cast<int>( C.spacing() ) );
3055  const complex<float> alpha( scalar );
3056  const complex<float> beta ( 1.0F, 0.0F );
3057 
3058  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3059  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3060  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3061  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3062  }
3063 #endif
3064  //**********************************************************************************************
3065 
3066  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3067 #if BLAZE_BLAS_MODE
3068 
3081  template< typename MT3 // Type of the left-hand side target matrix
3082  , typename MT4 // Type of the left-hand side matrix operand
3083  , typename MT5 // Type of the right-hand side matrix operand
3084  , typename ST2 > // Type of the scalar value
3085  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3086  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3087  {
3088  using boost::numeric_cast;
3089 
3090  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3091  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3092  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3094  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3095  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3096  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3097 
3098  const int M ( numeric_cast<int>( A.rows() ) );
3099  const int N ( numeric_cast<int>( B.columns() ) );
3100  const int K ( numeric_cast<int>( A.columns() ) );
3101  const int lda( numeric_cast<int>( A.spacing() ) );
3102  const int ldb( numeric_cast<int>( B.spacing() ) );
3103  const int ldc( numeric_cast<int>( C.spacing() ) );
3104  const complex<double> alpha( scalar );
3105  const complex<double> beta ( 1.0, 0.0 );
3106 
3107  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3108  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3109  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3110  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3111  }
3112 #endif
3113  //**********************************************************************************************
3114 
3115  //**Addition assignment to sparse matrices******************************************************
3116  // No special implementation for the addition assignment to sparse matrices.
3117  //**********************************************************************************************
3118 
3119  //**Subtraction assignment to dense matrices****************************************************
3131  template< typename MT3 // Type of the target dense matrix
3132  , bool SO > // Storage order of the target dense matrix
3133  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3134  {
3135  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3136  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3137 
3138  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3139  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3140 
3141  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3142  return;
3143  }
3144 
3145  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3146  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3147 
3148  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3149  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3150  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3151  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3152  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3153  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3154 
3155  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
3156  DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3157  else
3158  DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3159  }
3160  //**********************************************************************************************
3161 
3162  //**Default subtraction assignment to dense matrices********************************************
3176  template< typename MT3 // Type of the left-hand side target matrix
3177  , typename MT4 // Type of the left-hand side matrix operand
3178  , typename MT5 // Type of the right-hand side matrix operand
3179  , typename ST2 > // Type of the scalar value
3180  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3181  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3182  {
3183  const ResultType tmp( A * B * scalar );
3184  subAssign( C, tmp );
3185  }
3186  //**********************************************************************************************
3187 
3188  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3202  template< typename MT3 // Type of the left-hand side target matrix
3203  , typename MT4 // Type of the left-hand side matrix operand
3204  , typename MT5 // Type of the right-hand side matrix operand
3205  , typename ST2 > // Type of the scalar value
3206  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3207  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3208  {
3209  typedef IntrinsicTrait<ElementType> IT;
3210 
3211  const size_t M( A.rows() );
3212  const size_t N( B.spacing() );
3213  const size_t K( A.columns() );
3214 
3215  const IntrinsicType factor( set( scalar ) );
3216 
3217  size_t j( 0UL );
3218 
3219  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
3220  for( size_t i=0UL; i<M; ++i ) {
3221  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3222  for( size_t k=0UL; k<K; ++k ) {
3223  const IntrinsicType a1( set( A(i,k) ) );
3224  xmm1 = xmm1 + a1 * B.get(k,j );
3225  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
3226  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
3227  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
3228  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
3229  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
3230  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
3231  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
3232  }
3233  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3234  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) - xmm2 * factor );
3235  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) - xmm3 * factor );
3236  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) - xmm4 * factor );
3237  store( &(~C)(i,j+IT::size*4UL), load( &(~C)(i,j+IT::size*4UL) ) - xmm5 * factor );
3238  store( &(~C)(i,j+IT::size*5UL), load( &(~C)(i,j+IT::size*5UL) ) - xmm6 * factor );
3239  store( &(~C)(i,j+IT::size*6UL), load( &(~C)(i,j+IT::size*6UL) ) - xmm7 * factor );
3240  store( &(~C)(i,j+IT::size*7UL), load( &(~C)(i,j+IT::size*7UL) ) - xmm8 * factor );
3241  }
3242  }
3243  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
3244  size_t i( 0UL );
3245  for( ; (i+2UL) <= M; i+=2UL ) {
3246  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3247  for( size_t k=0UL; k<K; ++k ) {
3248  const IntrinsicType a1( set( A(i ,k) ) );
3249  const IntrinsicType a2( set( A(i+1UL,k) ) );
3250  const IntrinsicType b1( B.get(k,j ) );
3251  const IntrinsicType b2( B.get(k,j+IT::size ) );
3252  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
3253  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
3254  xmm1 = xmm1 + a1 * b1;
3255  xmm2 = xmm2 + a1 * b2;
3256  xmm3 = xmm3 + a1 * b3;
3257  xmm4 = xmm4 + a1 * b4;
3258  xmm5 = xmm5 + a2 * b1;
3259  xmm6 = xmm6 + a2 * b2;
3260  xmm7 = xmm7 + a2 * b3;
3261  xmm8 = xmm8 + a2 * b4;
3262  }
3263  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3264  store( &(~C)(i ,j+IT::size ), load( &(~C)(i ,j+IT::size ) ) - xmm2 * factor );
3265  store( &(~C)(i ,j+IT::size*2UL), load( &(~C)(i ,j+IT::size*2UL) ) - xmm3 * factor );
3266  store( &(~C)(i ,j+IT::size*3UL), load( &(~C)(i ,j+IT::size*3UL) ) - xmm4 * factor );
3267  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) - xmm5 * factor );
3268  store( &(~C)(i+1UL,j+IT::size ), load( &(~C)(i+1UL,j+IT::size ) ) - xmm6 * factor );
3269  store( &(~C)(i+1UL,j+IT::size*2UL), load( &(~C)(i+1UL,j+IT::size*2UL) ) - xmm7 * factor );
3270  store( &(~C)(i+1UL,j+IT::size*3UL), load( &(~C)(i+1UL,j+IT::size*3UL) ) - xmm8 * factor );
3271  }
3272  if( i < M ) {
3273  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3274  for( size_t k=0UL; k<K; ++k ) {
3275  const IntrinsicType a1( set( A(i,k) ) );
3276  xmm1 = xmm1 + a1 * B.get(k,j );
3277  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
3278  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
3279  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
3280  }
3281  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3282  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) - xmm2 * factor );
3283  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) - xmm3 * factor );
3284  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) - xmm4 * factor );
3285  }
3286  }
3287  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
3288  size_t i( 0UL );
3289  for( ; (i+2UL) <= M; i+=2UL ) {
3290  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3291  for( size_t k=0UL; k<K; ++k ) {
3292  const IntrinsicType a1( set( A(i ,k) ) );
3293  const IntrinsicType a2( set( A(i+1UL,k) ) );
3294  const IntrinsicType b1( B.get(k,j ) );
3295  const IntrinsicType b2( B.get(k,j+IT::size) );
3296  xmm1 = xmm1 + a1 * b1;
3297  xmm2 = xmm2 + a1 * b2;
3298  xmm3 = xmm3 + a2 * b1;
3299  xmm4 = xmm4 + a2 * b2;
3300  }
3301  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3302  store( &(~C)(i ,j+IT::size), load( &(~C)(i ,j+IT::size) ) - xmm2 * factor );
3303  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) - xmm3 * factor );
3304  store( &(~C)(i+1UL,j+IT::size), load( &(~C)(i+1UL,j+IT::size) ) - xmm4 * factor );
3305  }
3306  if( i < M ) {
3307  IntrinsicType xmm1, xmm2;
3308  for( size_t k=0UL; k<K; ++k ) {
3309  const IntrinsicType a1( set( A(i,k) ) );
3310  xmm1 = xmm1 + a1 * B.get(k,j );
3311  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
3312  }
3313  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3314  store( &(~C)(i,j+IT::size), load( &(~C)(i,j+IT::size) ) - xmm2 * factor );
3315  }
3316  }
3317  if( j < N ) {
3318  size_t i( 0UL );
3319  for( ; (i+2UL) <= M; i+=2UL ) {
3320  IntrinsicType xmm1, xmm2;
3321  for( size_t k=0UL; k<K; ++k ) {
3322  const IntrinsicType b1( B.get(k,j) );
3323  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3324  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3325  }
3326  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3327  store( &(~C)(i+1UL,j), load( &(~C)(i+1UL,j) ) - xmm2 * factor );
3328  }
3329  if( i < M ) {
3330  IntrinsicType xmm1;
3331  for( size_t k=0UL; k<K; ++k ) {
3332  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
3333  }
3334  store( &(~C)(i,j), load( &(~C)(i,j) ) - xmm1 * factor );
3335  }
3336  }
3337  }
3338  //**********************************************************************************************
3339 
3340  //**Vectorized default subtraction assignment to column-major dense matrices********************
3354  template< typename MT3 // Type of the left-hand side target matrix
3355  , typename MT4 // Type of the left-hand side matrix operand
3356  , typename MT5 // Type of the right-hand side matrix operand
3357  , typename ST2 > // Type of the scalar value
3358  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3359  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3360  {
3361  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
3362  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
3363 
3364  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3365  const typename MT4::OppositeType tmp( A );
3366  subAssign( ~C, tmp * B * scalar );
3367  }
3368  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3369  const typename MT5::OppositeType tmp( B );
3370  subAssign( ~C, A * tmp * scalar );
3371  }
3372  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3373  const typename MT4::OppositeType tmp( A );
3374  subAssign( ~C, tmp * B * scalar );
3375  }
3376  else {
3377  const typename MT5::OppositeType tmp( B );
3378  subAssign( ~C, A * tmp * scalar );
3379  }
3380  }
3381  //**********************************************************************************************
3382 
3383  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3397  template< typename MT3 // Type of the left-hand side target matrix
3398  , typename MT4 // Type of the left-hand side matrix operand
3399  , typename MT5 // Type of the right-hand side matrix operand
3400  , typename ST2 > // Type of the scalar value
3401  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3402  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3403  {
3404  selectDefaultSubAssignKernel( C, A, B, scalar );
3405  }
3406  //**********************************************************************************************
3407 
3408  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3409 #if BLAZE_BLAS_MODE
3410 
3423  template< typename MT3 // Type of the left-hand side target matrix
3424  , typename MT4 // Type of the left-hand side matrix operand
3425  , typename MT5 // Type of the right-hand side matrix operand
3426  , typename ST2 > // Type of the scalar value
3427  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3428  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3429  {
3430  using boost::numeric_cast;
3431 
3432  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
3433  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
3434  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
3435 
3436  const int M ( numeric_cast<int>( A.rows() ) );
3437  const int N ( numeric_cast<int>( B.columns() ) );
3438  const int K ( numeric_cast<int>( A.columns() ) );
3439  const int lda( numeric_cast<int>( A.spacing() ) );
3440  const int ldb( numeric_cast<int>( B.spacing() ) );
3441  const int ldc( numeric_cast<int>( C.spacing() ) );
3442 
3443  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3444  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3445  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3446  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3447  }
3448 #endif
3449  //**********************************************************************************************
3450 
3451  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3452 #if BLAZE_BLAS_MODE
3453 
3466  template< typename MT3 // Type of the left-hand side target matrix
3467  , typename MT4 // Type of the left-hand side matrix operand
3468  , typename MT5 // Type of the right-hand side matrix operand
3469  , typename ST2 > // Type of the scalar value
3470  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3471  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3472  {
3473  using boost::numeric_cast;
3474 
3475  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3476  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3477  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3478 
3479  const int M ( numeric_cast<int>( A.rows() ) );
3480  const int N ( numeric_cast<int>( B.columns() ) );
3481  const int K ( numeric_cast<int>( A.columns() ) );
3482  const int lda( numeric_cast<int>( A.spacing() ) );
3483  const int ldb( numeric_cast<int>( B.spacing() ) );
3484  const int ldc( numeric_cast<int>( C.spacing() ) );
3485 
3486  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3487  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3488  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3489  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3490  }
3491 #endif
3492  //**********************************************************************************************
3493 
3494  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3495 #if BLAZE_BLAS_MODE
3496 
3509  template< typename MT3 // Type of the left-hand side target matrix
3510  , typename MT4 // Type of the left-hand side matrix operand
3511  , typename MT5 // Type of the right-hand side matrix operand
3512  , typename ST2 > // Type of the scalar value
3513  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3514  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3515  {
3516  using boost::numeric_cast;
3517 
3518  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3519  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3520  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3522  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3523  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3524  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3525 
3526  const int M ( numeric_cast<int>( A.rows() ) );
3527  const int N ( numeric_cast<int>( B.columns() ) );
3528  const int K ( numeric_cast<int>( A.columns() ) );
3529  const int lda( numeric_cast<int>( A.spacing() ) );
3530  const int ldb( numeric_cast<int>( B.spacing() ) );
3531  const int ldc( numeric_cast<int>( C.spacing() ) );
3532  const complex<float> alpha( -scalar );
3533  const complex<float> beta ( 1.0F, 0.0F );
3534 
3535  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3536  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3537  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3538  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3539  }
3540 #endif
3541  //**********************************************************************************************
3542 
3543  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3544 #if BLAZE_BLAS_MODE
3545 
3558  template< typename MT3 // Type of the left-hand side target matrix
3559  , typename MT4 // Type of the left-hand side matrix operand
3560  , typename MT5 // Type of the right-hand side matrix operand
3561  , typename ST2 > // Type of the scalar value
3562  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3563  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3564  {
3565  using boost::numeric_cast;
3566 
3567  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3568  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3569  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3571  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3572  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3573  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3574 
3575  const int M ( numeric_cast<int>( A.rows() ) );
3576  const int N ( numeric_cast<int>( B.columns() ) );
3577  const int K ( numeric_cast<int>( A.columns() ) );
3578  const int lda( numeric_cast<int>( A.spacing() ) );
3579  const int ldb( numeric_cast<int>( B.spacing() ) );
3580  const int ldc( numeric_cast<int>( C.spacing() ) );
3581  const complex<double> alpha( -scalar );
3582  const complex<double> beta ( 1.0, 0.0 );
3583 
3584  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3585  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3586  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3587  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3588  }
3589 #endif
3590  //**********************************************************************************************
3591 
3592  //**Subtraction assignment to sparse matrices***************************************************
3593  // No special implementation for the subtraction assignment to sparse matrices.
3594  //**********************************************************************************************
3595 
3596  //**Multiplication assignment to dense matrices*************************************************
3597  // No special implementation for the multiplication assignment to dense matrices.
3598  //**********************************************************************************************
3599 
3600  //**Multiplication assignment to sparse matrices************************************************
3601  // No special implementation for the multiplication assignment to sparse matrices.
3602  //**********************************************************************************************
3603 
3604  //**Compile time checks*************************************************************************
3612  //**********************************************************************************************
3613 };
3615 //*************************************************************************************************
3616 
3617 
3618 
3619 
3620 //=================================================================================================
3621 //
3622 // GLOBAL BINARY ARITHMETIC OPERATORS
3623 //
3624 //=================================================================================================
3625 
3626 //*************************************************************************************************
3652 template< typename T1 // Type of the left-hand side dense matrix
3653  , typename T2 > // Type of the right-hand side dense matrix
3654 inline const DMatDMatMultExpr<T1,T2>
3656 {
3657  if( (~lhs).columns() != (~rhs).rows() )
3658  throw std::invalid_argument( "Matrix sizes do not match" );
3659 
3660  return DMatDMatMultExpr<T1,T2>( ~lhs, ~rhs );
3661 }
3662 //*************************************************************************************************
3663 
3664 
3665 
3666 
3667 //=================================================================================================
3668 //
3669 // EXPRESSION TRAIT SPECIALIZATIONS
3670 //
3671 //=================================================================================================
3672 
3673 //*************************************************************************************************
3675 template< typename MT1, typename MT2, typename VT >
3676 struct DMatDVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
3677 {
3678  public:
3679  //**********************************************************************************************
3680  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3681  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
3682  IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
3683  , typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
3684  , INVALID_TYPE >::Type Type;
3685  //**********************************************************************************************
3686 };
3688 //*************************************************************************************************
3689 
3690 
3691 //*************************************************************************************************
3693 template< typename MT1, typename MT2, typename VT >
3694 struct DMatSVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
3695 {
3696  public:
3697  //**********************************************************************************************
3698  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3699  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
3700  IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
3701  , typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
3702  , INVALID_TYPE >::Type Type;
3703  //**********************************************************************************************
3704 };
3706 //*************************************************************************************************
3707 
3708 
3709 //*************************************************************************************************
3711 template< typename VT, typename MT1, typename MT2 >
3712 struct TDVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
3713 {
3714  public:
3715  //**********************************************************************************************
3716  typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
3717  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3718  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
3719  , typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3720  , INVALID_TYPE >::Type Type;
3721  //**********************************************************************************************
3722 };
3724 //*************************************************************************************************
3725 
3726 
3727 //*************************************************************************************************
3729 template< typename VT, typename MT1, typename MT2 >
3730 struct TSVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
3731 {
3732  public:
3733  //**********************************************************************************************
3734  typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
3735  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3736  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
3737  , typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3738  , INVALID_TYPE >::Type Type;
3739  //**********************************************************************************************
3740 };
3742 //*************************************************************************************************
3743 
3744 } // namespace blaze
3745 
3746 #endif