All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
38 #include <blaze/math/Intrinsics.h>
39 #include <blaze/math/shims/Reset.h>
55 #include <blaze/system/BLAS.h>
57 #include <blaze/util/Assert.h>
58 #include <blaze/util/Complex.h>
63 #include <blaze/util/EnableIf.h>
64 #include <blaze/util/InvalidType.h>
65 #include <blaze/util/SelectType.h>
66 #include <blaze/util/Types.h>
72 
73 
74 namespace blaze {
75 
76 //=================================================================================================
77 //
78 // CLASS TDMATTDMATMULTEXPR
79 //
80 //=================================================================================================
81 
82 //*************************************************************************************************
89 template< typename MT1 // Type of the left-hand side dense matrix
90  , typename MT2 > // Type of the right-hand side dense matrix
91 class TDMatTDMatMultExpr : public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
92  , private Expression
93  , private Computation
94 {
95  private:
96  //**Type definitions****************************************************************************
97  typedef typename MT1::ResultType RT1;
98  typedef typename MT2::ResultType RT2;
99  typedef typename MT1::ElementType ET1;
100  typedef typename MT2::ElementType ET2;
101  typedef typename MT1::CompositeType CT1;
102  typedef typename MT2::CompositeType CT2;
103  //**********************************************************************************************
104 
105  //**********************************************************************************************
107 
108 
110  template< typename T1, typename T2, typename T3 >
111  struct UseSinglePrecisionKernel {
115  };
117  //**********************************************************************************************
118 
119  //**********************************************************************************************
121 
122 
124  template< typename T1, typename T2, typename T3 >
125  struct UseDoublePrecisionKernel {
129  };
131  //**********************************************************************************************
132 
133  //**********************************************************************************************
135 
136 
139  template< typename T1, typename T2, typename T3 >
140  struct UseSinglePrecisionComplexKernel {
141  typedef complex<float> Type;
142  enum { value = IsSame<typename T1::ElementType,Type>::value &&
143  IsSame<typename T2::ElementType,Type>::value &&
144  IsSame<typename T3::ElementType,Type>::value };
145  };
147  //**********************************************************************************************
148 
149  //**********************************************************************************************
151 
152 
155  template< typename T1, typename T2, typename T3 >
156  struct UseDoublePrecisionComplexKernel {
157  typedef complex<double> Type;
158  enum { value = IsSame<typename T1::ElementType,Type>::value &&
159  IsSame<typename T2::ElementType,Type>::value &&
160  IsSame<typename T3::ElementType,Type>::value };
161  };
163  //**********************************************************************************************
164 
165  //**********************************************************************************************
167 
168 
170  template< typename T1, typename T2, typename T3 >
171  struct UseDefaultKernel {
172  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
173  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
174  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
175  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
176  };
178  //**********************************************************************************************
179 
180  //**********************************************************************************************
182 
183 
185  template< typename T1, typename T2, typename T3 >
186  struct UseVectorizedDefaultKernel {
187  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
188  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
189  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
190  IntrinsicTrait<typename T1::ElementType>::addition &&
191  IntrinsicTrait<typename T1::ElementType>::multiplication };
192  };
194  //**********************************************************************************************
195 
196  public:
197  //**Type definitions****************************************************************************
200  typedef typename ResultType::OppositeType OppositeType;
201  typedef typename ResultType::TransposeType TransposeType;
202  typedef typename ResultType::ElementType ElementType;
204  typedef const ElementType ReturnType;
205  typedef const ResultType CompositeType;
206 
208  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
209 
211  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
212 
214  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
215 
217  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
218  //**********************************************************************************************
219 
220  //**Compilation flags***************************************************************************
222  enum { vectorizable = 0 };
223 
225  enum { canAlias = !IsComputation<MT1>::value || !IsComputation<MT2>::value };
226  //**********************************************************************************************
227 
228  //**Constructor*********************************************************************************
234  explicit inline TDMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
235  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
236  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
237  {
238  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
239  }
240  //**********************************************************************************************
241 
242  //**Access operator*****************************************************************************
249  inline ReturnType operator()( size_t i, size_t j ) const {
250  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
251  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
252 
253  ElementType tmp;
254 
255  if( lhs_.columns() != 0UL ) {
256  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
257  tmp = lhs_(i,0UL) * rhs_(0UL,j);
258  for( size_t k=1UL; k<end; k+=2UL ) {
259  tmp += lhs_(i,k ) * rhs_(k ,j);
260  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
261  }
262  if( end < lhs_.columns() ) {
263  tmp += lhs_(i,end) * rhs_(end,j);
264  }
265  }
266  else {
267  reset( tmp );
268  }
269 
270  return tmp;
271  }
272  //**********************************************************************************************
273 
274  //**Rows function*******************************************************************************
279  inline size_t rows() const {
280  return lhs_.rows();
281  }
282  //**********************************************************************************************
283 
284  //**Columns function****************************************************************************
289  inline size_t columns() const {
290  return rhs_.columns();
291  }
292  //**********************************************************************************************
293 
294  //**Left operand access*************************************************************************
299  inline LeftOperand leftOperand() const {
300  return lhs_;
301  }
302  //**********************************************************************************************
303 
304  //**Right operand access************************************************************************
309  inline RightOperand rightOperand() const {
310  return rhs_;
311  }
312  //**********************************************************************************************
313 
314  //**********************************************************************************************
320  template< typename T >
321  inline bool isAliased( const T* alias ) const {
322  return ( IsComputation<MT1>::value && lhs_.isAliased( alias ) ) ||
323  ( IsComputation<MT2>::value && rhs_.isAliased( alias ) );
324  }
325  //**********************************************************************************************
326 
327  private:
328  //**Member variables****************************************************************************
331  //**********************************************************************************************
332 
333  //**Assignment to dense matrices****************************************************************
343  template< typename MT // Type of the target dense matrix
344  , bool SO > // Storage order of the target dense matrix
345  friend inline void assign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
346  {
347  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
348  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
349 
350  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
351  return;
352  }
353  else if( rhs.lhs_.columns() == 0UL ) {
354  reset( ~lhs );
355  return;
356  }
357 
358  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
359  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
360 
361  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
362  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
363  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
364  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
365  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
366  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
367 
368  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
369  TDMatTDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
370  else
371  TDMatTDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
372  }
374  //**********************************************************************************************
375 
376  //**Default assignment to dense matrices********************************************************
390  template< typename MT3 // Type of the left-hand side target matrix
391  , typename MT4 // Type of the left-hand side matrix operand
392  , typename MT5 > // Type of the right-hand side matrix operand
393  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
394  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
395  {
396  const size_t M( A.rows() );
397  const size_t N( B.columns() );
398  const size_t K( A.columns() );
399 
400  for( size_t i=0UL; i<M; ++i ) {
401  for( size_t j=0UL; j<N; ++j ) {
402  C(i,j) = A(i,0UL) * B(0UL,j);
403  }
404  for( size_t k=1UL; k<K; ++k ) {
405  for( size_t j=0UL; j<N; ++j ) {
406  C(i,j) += A(i,k) * B(k,j);
407  }
408  }
409  }
410  }
412  //**********************************************************************************************
413 
414  //**Vectorized default assignment to row-major dense matrices***********************************
428  template< typename MT3 // Type of the left-hand side target matrix
429  , typename MT4 // Type of the left-hand side matrix operand
430  , typename MT5 > // Type of the right-hand side matrix operand
431  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
432  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
433  {
434  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
435  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
436 
437  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
438  const typename MT5::OppositeType tmp( B );
439  assign( ~C, A * tmp );
440  }
441  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
442  const typename MT4::OppositeType tmp( A );
443  assign( ~C, tmp * B );
444  }
445  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
446  const typename MT5::OppositeType tmp( B );
447  assign( ~C, A * tmp );
448  }
449  else {
450  const typename MT4::OppositeType tmp( A );
451  assign( ~C, tmp * B );
452  }
453  }
455  //**********************************************************************************************
456 
457  //**Vectorized default assignment to column-major dense matrices********************************
471  template< typename MT3 // Type of the left-hand side target matrix
472  , typename MT4 // Type of the left-hand side matrix operand
473  , typename MT5 > // Type of the right-hand side matrix operand
474  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
475  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
476  {
477  typedef IntrinsicTrait<ElementType> IT;
478 
479  const size_t M( A.spacing() );
480  const size_t N( B.columns() );
481  const size_t K( A.columns() );
482 
483  size_t i( 0UL );
484 
485  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
486  for( size_t j=0UL; j<N; ++j ) {
487  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
488  for( size_t k=0UL; k<K; ++k ) {
489  const IntrinsicType b1( set( B(k,j) ) );
490  xmm1 = xmm1 + A.get(i ,k) * b1;
491  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
492  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
493  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
494  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
495  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
496  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
497  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
498  }
499  store( &(~C)(i ,j), xmm1 );
500  store( &(~C)(i+IT::size ,j), xmm2 );
501  store( &(~C)(i+IT::size*2UL,j), xmm3 );
502  store( &(~C)(i+IT::size*3UL,j), xmm4 );
503  store( &(~C)(i+IT::size*4UL,j), xmm5 );
504  store( &(~C)(i+IT::size*5UL,j), xmm6 );
505  store( &(~C)(i+IT::size*6UL,j), xmm7 );
506  store( &(~C)(i+IT::size*7UL,j), xmm8 );
507  }
508  }
509  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
510  size_t j( 0UL );
511  for( ; (j+2UL) <= N; j+=2UL ) {
512  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
513  for( size_t k=0UL; k<K; ++k ) {
514  const IntrinsicType a1( A.get(i ,k) );
515  const IntrinsicType a2( A.get(i+IT::size ,k) );
516  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
517  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
518  const IntrinsicType b1( set( B(k,j ) ) );
519  const IntrinsicType b2( set( B(k,j+1UL) ) );
520  xmm1 = xmm1 + a1 * b1;
521  xmm2 = xmm2 + a2 * b1;
522  xmm3 = xmm3 + a3 * b1;
523  xmm4 = xmm4 + a4 * b1;
524  xmm5 = xmm5 + a1 * b2;
525  xmm6 = xmm6 + a2 * b2;
526  xmm7 = xmm7 + a3 * b2;
527  xmm8 = xmm8 + a4 * b2;
528  }
529  store( &(~C)(i ,j ), xmm1 );
530  store( &(~C)(i+IT::size ,j ), xmm2 );
531  store( &(~C)(i+IT::size*2UL,j ), xmm3 );
532  store( &(~C)(i+IT::size*3UL,j ), xmm4 );
533  store( &(~C)(i ,j+1UL), xmm5 );
534  store( &(~C)(i+IT::size ,j+1UL), xmm6 );
535  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 );
536  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 );
537  }
538  if( j < N ) {
539  IntrinsicType xmm1, xmm2, xmm3, xmm4;
540  for( size_t k=0UL; k<K; ++k ) {
541  const IntrinsicType b1( set( B(k,j) ) );
542  xmm1 = xmm1 + A.get(i ,k) * b1;
543  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
544  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
545  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
546  }
547  store( &(~C)(i ,j), xmm1 );
548  store( &(~C)(i+IT::size ,j), xmm2 );
549  store( &(~C)(i+IT::size*2UL,j), xmm3 );
550  store( &(~C)(i+IT::size*3UL,j), xmm4 );
551  }
552  }
553  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
554  size_t j( 0UL );
555  for( ; (j+2UL) <= N; j+=2UL ) {
556  IntrinsicType xmm1, xmm2, xmm3, xmm4;
557  for( size_t k=0UL; k<K; ++k ) {
558  const IntrinsicType a1( A.get(i ,k) );
559  const IntrinsicType a2( A.get(i+IT::size,k) );
560  const IntrinsicType b1( set( B(k,j ) ) );
561  const IntrinsicType b2( set( B(k,j+1UL) ) );
562  xmm1 = xmm1 + a1 * b1;
563  xmm2 = xmm2 + a2 * b1;
564  xmm3 = xmm3 + a1 * b2;
565  xmm4 = xmm4 + a2 * b2;
566  }
567  store( &(~C)(i ,j ), xmm1 );
568  store( &(~C)(i+IT::size,j ), xmm2 );
569  store( &(~C)(i ,j+1UL), xmm3 );
570  store( &(~C)(i+IT::size,j+1UL), xmm4 );
571  }
572  if( j < N ) {
573  IntrinsicType xmm1, xmm2;
574  for( size_t k=0UL; k<K; ++k ) {
575  const IntrinsicType b1( set( B(k,j) ) );
576  xmm1 = xmm1 + A.get(i ,k) * b1;
577  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
578  }
579  store( &(~C)(i ,j), xmm1 );
580  store( &(~C)(i+IT::size,j), xmm2 );
581  }
582  }
583  if( i < M ) {
584  size_t j( 0UL );
585  for( ; (j+2UL) <= N; j+=2UL ) {
586  IntrinsicType xmm1, xmm2;
587  for( size_t k=0UL; k<K; ++k ) {
588  const IntrinsicType a1( A.get(i,k) );
589  xmm1 = xmm1 + a1 * set( B(k,j ) );
590  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
591  }
592  store( &(~C)(i,j ), xmm1 );
593  store( &(~C)(i,j+1UL), xmm2 );
594  }
595  if( j < N ) {
596  IntrinsicType xmm1;
597  for( size_t k=0UL; k<K; ++k ) {
598  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
599  }
600  store( &(~C)(i,j), xmm1 );
601  }
602  }
603  }
605  //**********************************************************************************************
606 
607  //**BLAS-based assignment to dense matrices (default)*******************************************
621  template< typename MT3 // Type of the left-hand side target matrix
622  , typename MT4 // Type of the left-hand side matrix operand
623  , typename MT5 > // Type of the right-hand side matrix operand
624  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
625  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
626  {
627  selectDefaultAssignKernel( C, A, B );
628  }
630  //**********************************************************************************************
631 
632  //**BLAS-based assignment to dense matrices (single precision)**********************************
633 #if BLAZE_BLAS_MODE
634 
647  template< typename MT3 // Type of the left-hand side target matrix
648  , typename MT4 // Type of the left-hand side matrix operand
649  , typename MT5 > // Type of the right-hand side matrix operand
650  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
651  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
652  {
653  using boost::numeric_cast;
654 
655  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
656  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
657  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
658 
659  const int M ( numeric_cast<int>( A.rows() ) );
660  const int N ( numeric_cast<int>( B.columns() ) );
661  const int K ( numeric_cast<int>( A.columns() ) );
662  const int lda( numeric_cast<int>( A.spacing() ) );
663  const int ldb( numeric_cast<int>( B.spacing() ) );
664  const int ldc( numeric_cast<int>( C.spacing() ) );
665 
666  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
667  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
668  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
669  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
670  }
672 #endif
673  //**********************************************************************************************
674 
675  //**BLAS-based assignment to dense matrices (double precision)**********************************
676 #if BLAZE_BLAS_MODE
677 
690  template< typename MT3 // Type of the left-hand side target matrix
691  , typename MT4 // Type of the left-hand side matrix operand
692  , typename MT5 > // Type of the right-hand side matrix operand
693  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
694  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
695  {
696  using boost::numeric_cast;
697 
698  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
699  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
700  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
701 
702  const int M ( numeric_cast<int>( A.rows() ) );
703  const int N ( numeric_cast<int>( B.columns() ) );
704  const int K ( numeric_cast<int>( A.columns() ) );
705  const int lda( numeric_cast<int>( A.spacing() ) );
706  const int ldb( numeric_cast<int>( B.spacing() ) );
707  const int ldc( numeric_cast<int>( C.spacing() ) );
708 
709  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
710  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
711  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
712  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
713  }
715 #endif
716  //**********************************************************************************************
717 
718  //**BLAS-based assignment to dense matrices (single precision complex)**************************
719 #if BLAZE_BLAS_MODE
720 
733  template< typename MT3 // Type of the left-hand side target matrix
734  , typename MT4 // Type of the left-hand side matrix operand
735  , typename MT5 > // Type of the right-hand side matrix operand
736  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
737  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
738  {
739  using boost::numeric_cast;
740 
741  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
742  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
743  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
744  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
745  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
746  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
747 
748  const int M ( numeric_cast<int>( A.rows() ) );
749  const int N ( numeric_cast<int>( B.columns() ) );
750  const int K ( numeric_cast<int>( A.columns() ) );
751  const int lda( numeric_cast<int>( A.spacing() ) );
752  const int ldb( numeric_cast<int>( B.spacing() ) );
753  const int ldc( numeric_cast<int>( C.spacing() ) );
754  complex<float> alpha( 1.0F, 0.0F );
755  complex<float> beta ( 0.0F, 0.0F );
756 
757  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
758  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
759  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
760  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
761  }
763 #endif
764  //**********************************************************************************************
765 
766  //**BLAS-based assignment to dense matrices (double precision complex)**************************
767 #if BLAZE_BLAS_MODE
768 
781  template< typename MT3 // Type of the left-hand side target matrix
782  , typename MT4 // Type of the left-hand side matrix operand
783  , typename MT5 > // Type of the right-hand side matrix operand
784  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
785  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
786  {
787  using boost::numeric_cast;
788 
789  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
790  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
791  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
792  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
793  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
794  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
795 
796  const int M ( numeric_cast<int>( A.rows() ) );
797  const int N ( numeric_cast<int>( B.columns() ) );
798  const int K ( numeric_cast<int>( A.columns() ) );
799  const int lda( numeric_cast<int>( A.spacing() ) );
800  const int ldb( numeric_cast<int>( B.spacing() ) );
801  const int ldc( numeric_cast<int>( C.spacing() ) );
802  complex<double> alpha( 1.0, 0.0 );
803  complex<double> beta ( 0.0, 0.0 );
804 
805  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
806  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
807  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
808  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
809  }
811 #endif
812  //**********************************************************************************************
813 
814  //**Assignment to sparse matrices***************************************************************
827  template< typename MT // Type of the target sparse matrix
828  , bool SO > // Storage order of the target sparse matrix
829  friend inline void assign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
830  {
831  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
832 
838  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
839 
840  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
841  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
842 
843  const TmpType tmp( rhs );
844  assign( ~lhs, tmp );
845  }
847  //**********************************************************************************************
848 
849  //**Addition assignment to dense matrices*******************************************************
862  template< typename MT // Type of the target dense matrix
863  , bool SO > // Storage order of the target dense matrix
864  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
865  {
866  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
867  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
868 
869  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
870  return;
871  }
872 
873  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
874  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
875 
876  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
877  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
878  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
879  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
880  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
881  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
882 
883  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
884  TDMatTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
885  else
886  TDMatTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
887  }
889  //**********************************************************************************************
890 
891  //**Default addition assignment to dense matrices***********************************************
905  template< typename MT3 // Type of the left-hand side target matrix
906  , typename MT4 // Type of the left-hand side matrix operand
907  , typename MT5 > // Type of the right-hand side matrix operand
908  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
909  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
910  {
911  const size_t M( A.rows() );
912  const size_t N( B.columns() );
913  const size_t K( A.columns() );
914 
915  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
916  const size_t end( N & size_t(-2) );
917 
918  for( size_t i=0UL; i<M; ++i ) {
919  for( size_t k=0UL; k<K; ++k ) {
920  for( size_t j=0UL; j<end; j+=2UL ) {
921  C(i,j ) += A(i,k) * B(k,j );
922  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
923  }
924  if( end < N ) {
925  C(i,end) += A(i,k) * B(k,end);
926  }
927  }
928  }
929  }
931  //**********************************************************************************************
932 
933  //**Vectorized default addition assignment to row-major dense matrices**************************
947  template< typename MT3 // Type of the left-hand side target matrix
948  , typename MT4 // Type of the left-hand side matrix operand
949  , typename MT5 > // Type of the right-hand side matrix operand
950  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
951  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
952  {
953  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
954  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
955 
956  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
957  const typename MT5::OppositeType tmp( B );
958  addAssign( ~C, A * tmp );
959  }
960  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
961  const typename MT4::OppositeType tmp( A );
962  addAssign( ~C, tmp * B );
963  }
964  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
965  const typename MT5::OppositeType tmp( B );
966  addAssign( ~C, A * tmp );
967  }
968  else {
969  const typename MT4::OppositeType tmp( A );
970  addAssign( ~C, tmp * B );
971  }
972  }
974  //**********************************************************************************************
975 
976  //**Vectorized default addition assignment to column-major dense matrices***********************
990  template< typename MT3 // Type of the left-hand side target matrix
991  , typename MT4 // Type of the left-hand side matrix operand
992  , typename MT5 > // Type of the right-hand side matrix operand
993  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
994  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
995  {
996  typedef IntrinsicTrait<ElementType> IT;
997 
998  const size_t M( A.spacing() );
999  const size_t N( B.columns() );
1000  const size_t K( A.columns() );
1001 
1002  size_t i( 0UL );
1003 
1004  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1005  for( size_t j=0UL; j<N; ++j ) {
1006  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1007  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1008  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1009  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1010  IntrinsicType xmm5( load( &(~C)(i+IT::size*4UL,j) ) );
1011  IntrinsicType xmm6( load( &(~C)(i+IT::size*5UL,j) ) );
1012  IntrinsicType xmm7( load( &(~C)(i+IT::size*6UL,j) ) );
1013  IntrinsicType xmm8( load( &(~C)(i+IT::size*7UL,j) ) );
1014  for( size_t k=0UL; k<K; ++k ) {
1015  const IntrinsicType b1( set( B(k,j) ) );
1016  xmm1 = xmm1 + A.get(i ,k) * b1;
1017  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
1018  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
1019  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
1020  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
1021  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
1022  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
1023  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
1024  }
1025  store( &(~C)(i ,j), xmm1 );
1026  store( &(~C)(i+IT::size ,j), xmm2 );
1027  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1028  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1029  store( &(~C)(i+IT::size*4UL,j), xmm5 );
1030  store( &(~C)(i+IT::size*5UL,j), xmm6 );
1031  store( &(~C)(i+IT::size*6UL,j), xmm7 );
1032  store( &(~C)(i+IT::size*7UL,j), xmm8 );
1033  }
1034  }
1035  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1036  size_t j( 0UL );
1037  for( ; (j+2UL) <= N; j+=2UL ) {
1038  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1039  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j ) ) );
1040  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j ) ) );
1041  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j ) ) );
1042  IntrinsicType xmm5( load( &(~C)(i ,j+1UL) ) );
1043  IntrinsicType xmm6( load( &(~C)(i+IT::size ,j+1UL) ) );
1044  IntrinsicType xmm7( load( &(~C)(i+IT::size*2UL,j+1UL) ) );
1045  IntrinsicType xmm8( load( &(~C)(i+IT::size*3UL,j+1UL) ) );
1046  for( size_t k=0UL; k<K; ++k ) {
1047  const IntrinsicType a1( A.get(i ,k) );
1048  const IntrinsicType a2( A.get(i+IT::size ,k) );
1049  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
1050  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
1051  const IntrinsicType b1( set( B(k,j ) ) );
1052  const IntrinsicType b2( set( B(k,j+1UL) ) );
1053  xmm1 = xmm1 + a1 * b1;
1054  xmm2 = xmm2 + a2 * b1;
1055  xmm3 = xmm3 + a3 * b1;
1056  xmm4 = xmm4 + a4 * b1;
1057  xmm5 = xmm5 + a1 * b2;
1058  xmm6 = xmm6 + a2 * b2;
1059  xmm7 = xmm7 + a3 * b2;
1060  xmm8 = xmm8 + a4 * b2;
1061  }
1062  store( &(~C)(i ,j ), xmm1 );
1063  store( &(~C)(i+IT::size ,j ), xmm2 );
1064  store( &(~C)(i+IT::size*2UL,j ), xmm3 );
1065  store( &(~C)(i+IT::size*3UL,j ), xmm4 );
1066  store( &(~C)(i ,j+1UL), xmm5 );
1067  store( &(~C)(i+IT::size ,j+1UL), xmm6 );
1068  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 );
1069  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 );
1070  }
1071  if( j < N ) {
1072  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1073  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1074  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1075  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1076  for( size_t k=0UL; k<K; ++k ) {
1077  const IntrinsicType b1( set( B(k,j) ) );
1078  xmm1 = xmm1 + A.get(i ,k) * b1;
1079  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
1080  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
1081  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
1082  }
1083  store( &(~C)(i ,j), xmm1 );
1084  store( &(~C)(i+IT::size ,j), xmm2 );
1085  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1086  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1087  }
1088  }
1089  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1090  size_t j( 0UL );
1091  for( ; (j+2UL) <= N; j+=2UL ) {
1092  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1093  IntrinsicType xmm2( load( &(~C)(i+IT::size,j ) ) );
1094  IntrinsicType xmm3( load( &(~C)(i ,j+1UL) ) );
1095  IntrinsicType xmm4( load( &(~C)(i+IT::size,j+1UL) ) );
1096  for( size_t k=0UL; k<K; ++k ) {
1097  const IntrinsicType a1( A.get(i ,k) );
1098  const IntrinsicType a2( A.get(i+IT::size,k) );
1099  const IntrinsicType b1( set( B(k,j ) ) );
1100  const IntrinsicType b2( set( B(k,j+1UL) ) );
1101  xmm1 = xmm1 + a1 * b1;
1102  xmm2 = xmm2 + a2 * b1;
1103  xmm3 = xmm3 + a1 * b2;
1104  xmm4 = xmm4 + a2 * b2;
1105  }
1106  store( &(~C)(i ,j ), xmm1 );
1107  store( &(~C)(i+IT::size,j ), xmm2 );
1108  store( &(~C)(i ,j+1UL), xmm3 );
1109  store( &(~C)(i+IT::size,j+1UL), xmm4 );
1110  }
1111  if( j < N ) {
1112  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1113  IntrinsicType xmm2( load( &(~C)(i+IT::size,j) ) );
1114  for( size_t k=0UL; k<K; ++k ) {
1115  const IntrinsicType b1( set( B(k,j) ) );
1116  xmm1 = xmm1 + A.get(i ,k) * b1;
1117  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
1118  }
1119  store( &(~C)(i ,j), xmm1 );
1120  store( &(~C)(i+IT::size,j), xmm2 );
1121  }
1122  }
1123  if( i < M ) {
1124  size_t j( 0UL );
1125  for( ; (j+2UL) <= N; j+=2UL ) {
1126  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1127  IntrinsicType xmm2( load( &(~C)(i,j+1UL) ) );
1128  for( size_t k=0UL; k<K; ++k ) {
1129  const IntrinsicType a1( A.get(i,k) );
1130  xmm1 = xmm1 + a1 * set( B(k,j ) );
1131  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1132  }
1133  store( &(~C)(i,j ), xmm1 );
1134  store( &(~C)(i,j+1UL), xmm2 );
1135  }
1136  if( j < N ) {
1137  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1138  for( size_t k=0UL; k<K; ++k ) {
1139  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
1140  }
1141  store( &(~C)(i,j), xmm1 );
1142  }
1143  }
1144  }
1146  //**********************************************************************************************
1147 
1148  //**BLAS-based addition assignment to dense matrices (default)**********************************
1162  template< typename MT3 // Type of the left-hand side target matrix
1163  , typename MT4 // Type of the left-hand side matrix operand
1164  , typename MT5 > // Type of the right-hand side matrix operand
1165  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1166  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1167  {
1168  selectDefaultAddAssignKernel( C, A, B );
1169  }
1171  //**********************************************************************************************
1172 
1173  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1174 #if BLAZE_BLAS_MODE
1175 
1188  template< typename MT3 // Type of the left-hand side target matrix
1189  , typename MT4 // Type of the left-hand side matrix operand
1190  , typename MT5 > // Type of the right-hand side matrix operand
1191  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1192  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1193  {
1194  using boost::numeric_cast;
1195 
1196  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1197  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1198  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1199 
1200  const int M ( numeric_cast<int>( A.rows() ) );
1201  const int N ( numeric_cast<int>( B.columns() ) );
1202  const int K ( numeric_cast<int>( A.columns() ) );
1203  const int lda( numeric_cast<int>( A.spacing() ) );
1204  const int ldb( numeric_cast<int>( B.spacing() ) );
1205  const int ldc( numeric_cast<int>( C.spacing() ) );
1206 
1207  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1208  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1209  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1210  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1211  }
1213 #endif
1214  //**********************************************************************************************
1215 
1216  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1217 #if BLAZE_BLAS_MODE
1218 
1231  template< typename MT3 // Type of the left-hand side target matrix
1232  , typename MT4 // Type of the left-hand side matrix operand
1233  , typename MT5 > // Type of the right-hand side matrix operand
1234  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1235  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1236  {
1237  using boost::numeric_cast;
1238 
1239  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1240  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1241  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1242 
1243  const int M ( numeric_cast<int>( A.rows() ) );
1244  const int N ( numeric_cast<int>( B.columns() ) );
1245  const int K ( numeric_cast<int>( A.columns() ) );
1246  const int lda( numeric_cast<int>( A.spacing() ) );
1247  const int ldb( numeric_cast<int>( B.spacing() ) );
1248  const int ldc( numeric_cast<int>( C.spacing() ) );
1249 
1250  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1251  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1252  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1253  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1254  }
1256 #endif
1257  //**********************************************************************************************
1258 
1259  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1260 #if BLAZE_BLAS_MODE
1261 
1274  template< typename MT3 // Type of the left-hand side target matrix
1275  , typename MT4 // Type of the left-hand side matrix operand
1276  , typename MT5 > // Type of the right-hand side matrix operand
1277  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1278  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1279  {
1280  using boost::numeric_cast;
1281 
1282  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1283  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1284  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1285  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1286  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1287  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1288 
1289  const int M ( numeric_cast<int>( A.rows() ) );
1290  const int N ( numeric_cast<int>( B.columns() ) );
1291  const int K ( numeric_cast<int>( A.columns() ) );
1292  const int lda( numeric_cast<int>( A.spacing() ) );
1293  const int ldb( numeric_cast<int>( B.spacing() ) );
1294  const int ldc( numeric_cast<int>( C.spacing() ) );
1295  const complex<float> alpha( 1.0F, 0.0F );
1296  const complex<float> beta ( 1.0F, 0.0F );
1297 
1298  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1299  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1300  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1301  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1302  }
1304 #endif
1305  //**********************************************************************************************
1306 
1307  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1308 #if BLAZE_BLAS_MODE
1309 
1322  template< typename MT3 // Type of the left-hand side target matrix
1323  , typename MT4 // Type of the left-hand side matrix operand
1324  , typename MT5 > // Type of the right-hand side matrix operand
1325  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1326  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1327  {
1328  using boost::numeric_cast;
1329 
1330  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1331  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1332  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1333  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1334  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1335  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1336 
1337  const int M ( numeric_cast<int>( A.rows() ) );
1338  const int N ( numeric_cast<int>( B.columns() ) );
1339  const int K ( numeric_cast<int>( A.columns() ) );
1340  const int lda( numeric_cast<int>( A.spacing() ) );
1341  const int ldb( numeric_cast<int>( B.spacing() ) );
1342  const int ldc( numeric_cast<int>( C.spacing() ) );
1343  const complex<double> alpha( 1.0, 0.0 );
1344  const complex<double> beta ( 1.0, 0.0 );
1345 
1346  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1347  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1348  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1349  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1350  }
1352 #endif
1353  //**********************************************************************************************
1354 
1355  //**Addition assignment to sparse matrices******************************************************
1356  // No special implementation for the addition assignment to sparse matrices.
1357  //**********************************************************************************************
1358 
1359  //**Subtraction assignment to dense matrices****************************************************
1372  template< typename MT // Type of the target dense matrix
1373  , bool SO > // Storage order of the target dense matrix
1374  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1375  {
1376  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1377  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1378 
1379  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1380  return;
1381  }
1382 
1383  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1384  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1385 
1386  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1387  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1388  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1389  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1390  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1391  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1392 
1393  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
1394  TDMatTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1395  else
1396  TDMatTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1397  }
1399  //**********************************************************************************************
1400 
1401  //**Default subtraction assignment to dense matrices********************************************
1415  template< typename MT3 // Type of the left-hand side target matrix
1416  , typename MT4 // Type of the left-hand side matrix operand
1417  , typename MT5 > // Type of the right-hand side matrix operand
1418  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1419  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1420  {
1421  const size_t M( A.rows() );
1422  const size_t N( B.columns() );
1423  const size_t K( A.columns() );
1424 
1425  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1426  const size_t end( N & size_t(-2) );
1427 
1428  for( size_t i=0UL; i<M; ++i ) {
1429  for( size_t k=0UL; k<K; ++k ) {
1430  for( size_t j=0UL; j<end; j+=2UL ) {
1431  C(i,j ) -= A(i,k) * B(k,j );
1432  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1433  }
1434  if( end < N ) {
1435  C(i,end) -= A(i,k) * B(k,end);
1436  }
1437  }
1438  }
1439  }
1441  //**********************************************************************************************
1442 
1443  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1457  template< typename MT3 // Type of the left-hand side target matrix
1458  , typename MT4 // Type of the left-hand side matrix operand
1459  , typename MT5 > // Type of the right-hand side matrix operand
1460  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1461  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1462  {
1463  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
1464  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
1465 
1466  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1467  const typename MT5::OppositeType tmp( B );
1468  subAssign( ~C, A * tmp );
1469  }
1470  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1471  const typename MT4::OppositeType tmp( A );
1472  subAssign( ~C, tmp * B );
1473  }
1474  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1475  const typename MT5::OppositeType tmp( B );
1476  subAssign( ~C, A * tmp );
1477  }
1478  else {
1479  const typename MT4::OppositeType tmp( A );
1480  subAssign( ~C, tmp * B );
1481  }
1482  }
1484  //**********************************************************************************************
1485 
1486  //**Vectorized default subtraction assignment to column-major dense matrices********************
1500  template< typename MT3 // Type of the left-hand side target matrix
1501  , typename MT4 // Type of the left-hand side matrix operand
1502  , typename MT5 > // Type of the right-hand side matrix operand
1503  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1504  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1505  {
1506  typedef IntrinsicTrait<ElementType> IT;
1507 
1508  const size_t M( A.spacing() );
1509  const size_t N( B.columns() );
1510  const size_t K( A.columns() );
1511 
1512  size_t i( 0UL );
1513 
1514  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1515  for( size_t j=0UL; j<N; ++j ) {
1516  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1517  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1518  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1519  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1520  IntrinsicType xmm5( load( &(~C)(i+IT::size*4UL,j) ) );
1521  IntrinsicType xmm6( load( &(~C)(i+IT::size*5UL,j) ) );
1522  IntrinsicType xmm7( load( &(~C)(i+IT::size*6UL,j) ) );
1523  IntrinsicType xmm8( load( &(~C)(i+IT::size*7UL,j) ) );
1524  for( size_t k=0UL; k<K; ++k ) {
1525  const IntrinsicType b1( set( B(k,j) ) );
1526  xmm1 = xmm1 - A.get(i ,k) * b1;
1527  xmm2 = xmm2 - A.get(i+IT::size ,k) * b1;
1528  xmm3 = xmm3 - A.get(i+IT::size*2UL,k) * b1;
1529  xmm4 = xmm4 - A.get(i+IT::size*3UL,k) * b1;
1530  xmm5 = xmm5 - A.get(i+IT::size*4UL,k) * b1;
1531  xmm6 = xmm6 - A.get(i+IT::size*5UL,k) * b1;
1532  xmm7 = xmm7 - A.get(i+IT::size*6UL,k) * b1;
1533  xmm8 = xmm8 - A.get(i+IT::size*7UL,k) * b1;
1534  }
1535  store( &(~C)(i ,j), xmm1 );
1536  store( &(~C)(i+IT::size ,j), xmm2 );
1537  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1538  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1539  store( &(~C)(i+IT::size*4UL,j), xmm5 );
1540  store( &(~C)(i+IT::size*5UL,j), xmm6 );
1541  store( &(~C)(i+IT::size*6UL,j), xmm7 );
1542  store( &(~C)(i+IT::size*7UL,j), xmm8 );
1543  }
1544  }
1545  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1546  size_t j( 0UL );
1547  for( ; (j+2UL) <= N; j+=2UL ) {
1548  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1549  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j ) ) );
1550  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j ) ) );
1551  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j ) ) );
1552  IntrinsicType xmm5( load( &(~C)(i ,j+1UL) ) );
1553  IntrinsicType xmm6( load( &(~C)(i+IT::size ,j+1UL) ) );
1554  IntrinsicType xmm7( load( &(~C)(i+IT::size*2UL,j+1UL) ) );
1555  IntrinsicType xmm8( load( &(~C)(i+IT::size*3UL,j+1UL) ) );
1556  for( size_t k=0UL; k<K; ++k ) {
1557  const IntrinsicType a1( A.get(i ,k) );
1558  const IntrinsicType a2( A.get(i+IT::size ,k) );
1559  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
1560  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
1561  const IntrinsicType b1( set( B(k,j ) ) );
1562  const IntrinsicType b2( set( B(k,j+1UL) ) );
1563  xmm1 = xmm1 - a1 * b1;
1564  xmm2 = xmm2 - a2 * b1;
1565  xmm3 = xmm3 - a3 * b1;
1566  xmm4 = xmm4 - a4 * b1;
1567  xmm5 = xmm5 - a1 * b2;
1568  xmm6 = xmm6 - a2 * b2;
1569  xmm7 = xmm7 - a3 * b2;
1570  xmm8 = xmm8 - a4 * b2;
1571  }
1572  store( &(~C)(i ,j ), xmm1 );
1573  store( &(~C)(i+IT::size ,j ), xmm2 );
1574  store( &(~C)(i+IT::size*2UL,j ), xmm3 );
1575  store( &(~C)(i+IT::size*3UL,j ), xmm4 );
1576  store( &(~C)(i ,j+1UL), xmm5 );
1577  store( &(~C)(i+IT::size ,j+1UL), xmm6 );
1578  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 );
1579  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 );
1580  }
1581  if( j < N ) {
1582  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1583  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1584  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1585  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1586  for( size_t k=0UL; k<K; ++k ) {
1587  const IntrinsicType b1( set( B(k,j) ) );
1588  xmm1 = xmm1 - A.get(i ,k) * b1;
1589  xmm2 = xmm2 - A.get(i+IT::size ,k) * b1;
1590  xmm3 = xmm3 - A.get(i+IT::size*2UL,k) * b1;
1591  xmm4 = xmm4 - A.get(i+IT::size*3UL,k) * b1;
1592  }
1593  store( &(~C)(i ,j), xmm1 );
1594  store( &(~C)(i+IT::size ,j), xmm2 );
1595  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1596  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1597  }
1598  }
1599  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1600  size_t j( 0UL );
1601  for( ; (j+2UL) <= N; j+=2UL ) {
1602  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1603  IntrinsicType xmm2( load( &(~C)(i+IT::size,j ) ) );
1604  IntrinsicType xmm3( load( &(~C)(i ,j+1UL) ) );
1605  IntrinsicType xmm4( load( &(~C)(i+IT::size,j+1UL) ) );
1606  for( size_t k=0UL; k<K; ++k ) {
1607  const IntrinsicType a1( A.get(i ,k) );
1608  const IntrinsicType a2( A.get(i+IT::size,k) );
1609  const IntrinsicType b1( set( B(k,j ) ) );
1610  const IntrinsicType b2( set( B(k,j+1UL) ) );
1611  xmm1 = xmm1 - a1 * b1;
1612  xmm2 = xmm2 - a2 * b1;
1613  xmm3 = xmm3 - a1 * b2;
1614  xmm4 = xmm4 - a2 * b2;
1615  }
1616  store( &(~C)(i ,j ), xmm1 );
1617  store( &(~C)(i+IT::size,j ), xmm2 );
1618  store( &(~C)(i ,j+1UL), xmm3 );
1619  store( &(~C)(i+IT::size,j+1UL), xmm4 );
1620  }
1621  if( j < N ) {
1622  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1623  IntrinsicType xmm2( load( &(~C)(i+IT::size,j) ) );
1624  for( size_t k=0UL; k<K; ++k ) {
1625  const IntrinsicType b1( set( B(k,j) ) );
1626  xmm1 = xmm1 - A.get(i ,k) * b1;
1627  xmm2 = xmm2 - A.get(i+IT::size,k) * b1;
1628  }
1629  store( &(~C)(i ,j), xmm1 );
1630  store( &(~C)(i+IT::size,j), xmm2 );
1631  }
1632  }
1633  if( i < M ) {
1634  size_t j( 0UL );
1635  for( ; (j+2UL) <= N; j+=2UL ) {
1636  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1637  IntrinsicType xmm2( load( &(~C)(i,j+1UL) ) );
1638  for( size_t k=0UL; k<K; ++k ) {
1639  const IntrinsicType a1( A.get(i,k) );
1640  xmm1 = xmm1 - a1 * set( B(k,j ) );
1641  xmm2 = xmm2 - a1 * set( B(k,j+1UL) );
1642  }
1643  store( &(~C)(i,j ), xmm1 );
1644  store( &(~C)(i,j+1UL), xmm2 );
1645  }
1646  if( j < N ) {
1647  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1648  for( size_t k=0UL; k<K; ++k ) {
1649  xmm1 = xmm1 - A.get(i,k) * set( B(k,j) );
1650  }
1651  store( &(~C)(i,j), xmm1 );
1652  }
1653  }
1654  }
1656  //**********************************************************************************************
1657 
1658  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1672  template< typename MT3 // Type of the left-hand side target matrix
1673  , typename MT4 // Type of the left-hand side matrix operand
1674  , typename MT5 > // Type of the right-hand side matrix operand
1675  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1676  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1677  {
1678  selectDefaultSubAssignKernel( C, A, B );
1679  }
1681  //**********************************************************************************************
1682 
1683  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1684 #if BLAZE_BLAS_MODE
1685 
1698  template< typename MT3 // Type of the left-hand side target matrix
1699  , typename MT4 // Type of the left-hand side matrix operand
1700  , typename MT5 > // Type of the right-hand side matrix operand
1701  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1702  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1703  {
1704  using boost::numeric_cast;
1705 
1706  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1707  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1708  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1709 
1710  const int M ( numeric_cast<int>( A.rows() ) );
1711  const int N ( numeric_cast<int>( B.columns() ) );
1712  const int K ( numeric_cast<int>( A.columns() ) );
1713  const int lda( numeric_cast<int>( A.spacing() ) );
1714  const int ldb( numeric_cast<int>( B.spacing() ) );
1715  const int ldc( numeric_cast<int>( C.spacing() ) );
1716 
1717  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1718  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1719  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1720  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1721  }
1723 #endif
1724  //**********************************************************************************************
1725 
1726  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1727 #if BLAZE_BLAS_MODE
1728 
1741  template< typename MT3 // Type of the left-hand side target matrix
1742  , typename MT4 // Type of the left-hand side matrix operand
1743  , typename MT5 > // Type of the right-hand side matrix operand
1744  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1745  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1746  {
1747  using boost::numeric_cast;
1748 
1749  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1750  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1751  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1752 
1753  const int M ( numeric_cast<int>( A.rows() ) );
1754  const int N ( numeric_cast<int>( B.columns() ) );
1755  const int K ( numeric_cast<int>( A.columns() ) );
1756  const int lda( numeric_cast<int>( A.spacing() ) );
1757  const int ldb( numeric_cast<int>( B.spacing() ) );
1758  const int ldc( numeric_cast<int>( C.spacing() ) );
1759 
1760  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1761  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1762  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1763  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1764  }
1766 #endif
1767  //**********************************************************************************************
1768 
1769  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1770 #if BLAZE_BLAS_MODE
1771 
1784  template< typename MT3 // Type of the left-hand side target matrix
1785  , typename MT4 // Type of the left-hand side matrix operand
1786  , typename MT5 > // Type of the right-hand side matrix operand
1787  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1788  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1789  {
1790  using boost::numeric_cast;
1791 
1792  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1793  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1794  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1795  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1796  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1797  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1798 
1799  const int M ( numeric_cast<int>( A.rows() ) );
1800  const int N ( numeric_cast<int>( B.columns() ) );
1801  const int K ( numeric_cast<int>( A.columns() ) );
1802  const int lda( numeric_cast<int>( A.spacing() ) );
1803  const int ldb( numeric_cast<int>( B.spacing() ) );
1804  const int ldc( numeric_cast<int>( C.spacing() ) );
1805  const complex<float> alpha( -1.0F, 0.0F );
1806  const complex<float> beta ( 1.0F, 0.0F );
1807 
1808  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1809  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1810  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1811  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1812  }
1814 #endif
1815  //**********************************************************************************************
1816 
1817  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1818 #if BLAZE_BLAS_MODE
1819 
1832  template< typename MT3 // Type of the left-hand side target matrix
1833  , typename MT4 // Type of the left-hand side matrix operand
1834  , typename MT5 > // Type of the right-hand side matrix operand
1835  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1836  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1837  {
1838  using boost::numeric_cast;
1839 
1840  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1841  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1842  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1843  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1844  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1845  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1846 
1847  const int M ( numeric_cast<int>( A.rows() ) );
1848  const int N ( numeric_cast<int>( B.columns() ) );
1849  const int K ( numeric_cast<int>( A.columns() ) );
1850  const int lda( numeric_cast<int>( A.spacing() ) );
1851  const int ldb( numeric_cast<int>( B.spacing() ) );
1852  const int ldc( numeric_cast<int>( C.spacing() ) );
1853  const complex<double> alpha( -1.0, 0.0 );
1854  const complex<double> beta ( 1.0, 0.0 );
1855 
1856  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1857  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1858  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1859  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1860  }
1862 #endif
1863  //**********************************************************************************************
1864 
1865  //**Subtraction assignment to sparse matrices***************************************************
1866  // No special implementation for the subtraction assignment to sparse matrices.
1867  //**********************************************************************************************
1868 
1869  //**Multiplication assignment to dense matrices*************************************************
1870  // No special implementation for the multiplication assignment to dense matrices.
1871  //**********************************************************************************************
1872 
1873  //**Multiplication assignment to sparse matrices************************************************
1874  // No special implementation for the multiplication assignment to sparse matrices.
1875  //**********************************************************************************************
1876 
1877  //**Compile time checks*************************************************************************
1884  //**********************************************************************************************
1885 };
1886 //*************************************************************************************************
1887 
1888 
1889 
1890 
1891 //=================================================================================================
1892 //
1893 // DMATSCALARMULTEXPR SPECIALIZATION
1894 //
1895 //=================================================================================================
1896 
1897 //*************************************************************************************************
1905 template< typename MT1 // Type of the left-hand side dense matrix
1906  , typename MT2 // Type of the right-hand side dense matrix
1907  , typename ST > // Type of the right-hand side scalar value
1908 class DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >
1909  : public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
1910  , private Expression
1911  , private Computation
1912 {
1913  private:
1914  //**Type definitions****************************************************************************
1915  typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
1916  typedef typename MMM::ResultType RES;
1917  typedef typename MT1::ResultType RT1;
1918  typedef typename MT2::ResultType RT2;
1919  typedef typename MT1::CompositeType CT1;
1920  typedef typename MT2::CompositeType CT2;
1921  //**********************************************************************************************
1922 
1923  //**********************************************************************************************
1925 
1928  template< typename T1, typename T2, typename T3, typename T4 >
1929  struct UseSinglePrecisionKernel {
1930  enum { value = IsFloat<typename T1::ElementType>::value &&
1931  IsFloat<typename T2::ElementType>::value &&
1932  IsFloat<typename T3::ElementType>::value &&
1933  !IsComplex<T4>::value };
1934  };
1935  //**********************************************************************************************
1936 
1937  //**********************************************************************************************
1939 
1942  template< typename T1, typename T2, typename T3, typename T4 >
1943  struct UseDoublePrecisionKernel {
1944  enum { value = IsDouble<typename T1::ElementType>::value &&
1945  IsDouble<typename T2::ElementType>::value &&
1946  IsDouble<typename T3::ElementType>::value &&
1947  !IsComplex<T4>::value };
1948  };
1949  //**********************************************************************************************
1950 
1951  //**********************************************************************************************
1953 
1956  template< typename T1, typename T2, typename T3 >
1957  struct UseSinglePrecisionComplexKernel {
1958  typedef complex<float> Type;
1959  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1960  IsSame<typename T2::ElementType,Type>::value &&
1961  IsSame<typename T3::ElementType,Type>::value };
1962  };
1963  //**********************************************************************************************
1964 
1965  //**********************************************************************************************
1967 
1970  template< typename T1, typename T2, typename T3 >
1971  struct UseDoublePrecisionComplexKernel {
1972  typedef complex<double> Type;
1973  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1974  IsSame<typename T2::ElementType,Type>::value &&
1975  IsSame<typename T3::ElementType,Type>::value };
1976  };
1977  //**********************************************************************************************
1978 
1979  //**********************************************************************************************
1981 
1983  template< typename T1, typename T2, typename T3, typename T4 >
1984  struct UseDefaultKernel {
1985  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
1986  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
1987  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
1988  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
1989  };
1990  //**********************************************************************************************
1991 
1992  //**********************************************************************************************
1994 
1996  template< typename T1, typename T2, typename T3, typename T4 >
1997  struct UseVectorizedDefaultKernel {
1998  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
1999  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2000  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2001  IsSame<typename T1::ElementType,T4>::value &&
2002  IntrinsicTrait<typename T1::ElementType>::addition &&
2003  IntrinsicTrait<typename T1::ElementType>::multiplication };
2004  };
2005  //**********************************************************************************************
2006 
2007  public:
2008  //**Type definitions****************************************************************************
2009  typedef DMatScalarMultExpr<MMM,ST,true> This;
2010  typedef typename MultTrait<RES,ST>::Type ResultType;
2011  typedef typename ResultType::OppositeType OppositeType;
2012  typedef typename ResultType::TransposeType TransposeType;
2013  typedef typename ResultType::ElementType ElementType;
2014  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2015  typedef const ElementType ReturnType;
2016  typedef const ResultType CompositeType;
2017 
2020 
2022  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
2023 
2025  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
2026 
2028  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
2029  //**********************************************************************************************
2030 
2031  //**Compilation flags***************************************************************************
2033  enum { vectorizable = 0 };
2034 
2036  enum { canAlias = CanAlias<MMM>::value };
2037  //**********************************************************************************************
2038 
2039  //**Constructor*********************************************************************************
2045  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2046  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2047  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2048  {}
2049  //**********************************************************************************************
2050 
2051  //**Access operator*****************************************************************************
2058  inline ReturnType operator()( size_t i, size_t j ) const {
2059  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2060  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2061  return matrix_(i,j) * scalar_;
2062  }
2063  //**********************************************************************************************
2064 
2065  //**Rows function*******************************************************************************
2070  inline size_t rows() const {
2071  return matrix_.rows();
2072  }
2073  //**********************************************************************************************
2074 
2075  //**Columns function****************************************************************************
2080  inline size_t columns() const {
2081  return matrix_.columns();
2082  }
2083  //**********************************************************************************************
2084 
2085  //**Left operand access*************************************************************************
2090  inline LeftOperand leftOperand() const {
2091  return matrix_;
2092  }
2093  //**********************************************************************************************
2094 
2095  //**Right operand access************************************************************************
2100  inline RightOperand rightOperand() const {
2101  return scalar_;
2102  }
2103  //**********************************************************************************************
2104 
2105  //**********************************************************************************************
2111  template< typename T >
2112  inline bool isAliased( const T* alias ) const {
2113  return CanAlias<MMM>::value && matrix_.isAliased( alias );
2114  }
2115  //**********************************************************************************************
2116 
2117  private:
2118  //**Member variables****************************************************************************
2119  LeftOperand matrix_;
2120  RightOperand scalar_;
2121  //**********************************************************************************************
2122 
2123  //**Assignment to dense matrices****************************************************************
2132  template< typename MT3 // Type of the target dense matrix
2133  , bool SO > // Storage order of the target dense matrix
2134  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2135  {
2136  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2137  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2138 
2139  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2140  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2141 
2142  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2143  return;
2144  }
2145  else if( left.columns() == 0UL ) {
2146  reset( ~lhs );
2147  return;
2148  }
2149 
2150  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2151  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2152 
2153  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2154  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2155  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2156  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2157  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2158  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2159 
2160  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
2161  DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2162  else
2163  DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2164  }
2165  //**********************************************************************************************
2166 
2167  //**Default assignment to dense matrices********************************************************
2181  template< typename MT3 // Type of the left-hand side target matrix
2182  , typename MT4 // Type of the left-hand side matrix operand
2183  , typename MT5 // Type of the right-hand side matrix operand
2184  , typename ST2 > // Type of the scalar value
2185  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2186  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2187  {
2188  for( size_t i=0UL; i<A.rows(); ++i ) {
2189  for( size_t k=0UL; k<B.columns(); ++k ) {
2190  C(i,k) = A(i,0UL) * B(0UL,k);
2191  }
2192  for( size_t j=1UL; j<A.columns(); ++j ) {
2193  for( size_t k=0UL; k<B.columns(); ++k ) {
2194  C(i,k) += A(i,j) * B(j,k);
2195  }
2196  }
2197  for( size_t k=0UL; k<B.columns(); ++k ) {
2198  C(i,k) *= scalar;
2199  }
2200  }
2201  }
2202  //**********************************************************************************************
2203 
2204  //**Vectorized default assignment to row-major dense matrices***********************************
2218  template< typename MT3 // Type of the left-hand side target matrix
2219  , typename MT4 // Type of the left-hand side matrix operand
2220  , typename MT5 // Type of the right-hand side matrix operand
2221  , typename ST2 > // Type of the scalar value
2222  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2223  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2224  {
2225  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2226  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2227 
2228  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2229  const typename MT5::OppositeType tmp( B );
2230  assign( ~C, A * tmp * scalar );
2231  }
2232  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2233  const typename MT4::OppositeType tmp( A );
2234  assign( ~C, tmp * B * scalar );
2235  }
2236  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2237  const typename MT5::OppositeType tmp( B );
2238  assign( ~C, A * tmp * scalar );
2239  }
2240  else {
2241  const typename MT4::OppositeType tmp( A );
2242  assign( ~C, tmp * B * scalar );
2243  }
2244  }
2245  //**********************************************************************************************
2246 
2247  //**Vectorized default assignment to column-major dense matrices********************************
2261  template< typename MT3 // Type of the left-hand side target matrix
2262  , typename MT4 // Type of the left-hand side matrix operand
2263  , typename MT5 // Type of the right-hand side matrix operand
2264  , typename ST2 > // Type of the scalar value
2265  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2266  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2267  {
2268  typedef IntrinsicTrait<ElementType> IT;
2269 
2270  const size_t M( A.spacing() );
2271  const size_t N( B.columns() );
2272  const size_t K( A.columns() );
2273 
2274  const IntrinsicType factor( set( scalar ) );
2275 
2276  size_t i( 0UL );
2277 
2278  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2279  for( size_t j=0UL; j<N; ++j ) {
2280  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2281  for( size_t k=0UL; k<K; ++k ) {
2282  const IntrinsicType b1( set( B(k,j) ) );
2283  xmm1 = xmm1 + A.get(i ,k) * b1;
2284  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2285  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2286  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2287  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
2288  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
2289  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
2290  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
2291  }
2292  store( &(~C)(i ,j), xmm1 * factor );
2293  store( &(~C)(i+IT::size ,j), xmm2 * factor );
2294  store( &(~C)(i+IT::size*2UL,j), xmm3 * factor );
2295  store( &(~C)(i+IT::size*3UL,j), xmm4 * factor );
2296  store( &(~C)(i+IT::size*4UL,j), xmm5 * factor );
2297  store( &(~C)(i+IT::size*5UL,j), xmm6 * factor );
2298  store( &(~C)(i+IT::size*6UL,j), xmm7 * factor );
2299  store( &(~C)(i+IT::size*7UL,j), xmm8 * factor );
2300  }
2301  }
2302  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2303  size_t j( 0UL );
2304  for( ; (j+2UL) <= N; j+=2UL ) {
2305  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2306  for( size_t k=0UL; k<K; ++k ) {
2307  const IntrinsicType a1( A.get(i ,k) );
2308  const IntrinsicType a2( A.get(i+IT::size ,k) );
2309  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
2310  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
2311  const IntrinsicType b1( set( B(k,j ) ) );
2312  const IntrinsicType b2( set( B(k,j+1UL) ) );
2313  xmm1 = xmm1 + a1 * b1;
2314  xmm2 = xmm2 + a2 * b1;
2315  xmm3 = xmm3 + a3 * b1;
2316  xmm4 = xmm4 + a4 * b1;
2317  xmm5 = xmm5 + a1 * b2;
2318  xmm6 = xmm6 + a2 * b2;
2319  xmm7 = xmm7 + a3 * b2;
2320  xmm8 = xmm8 + a4 * b2;
2321  }
2322  store( &(~C)(i ,j ), xmm1 * factor );
2323  store( &(~C)(i+IT::size ,j ), xmm2 * factor );
2324  store( &(~C)(i+IT::size*2UL,j ), xmm3 * factor );
2325  store( &(~C)(i+IT::size*3UL,j ), xmm4 * factor );
2326  store( &(~C)(i ,j+1UL), xmm5 * factor );
2327  store( &(~C)(i+IT::size ,j+1UL), xmm6 * factor );
2328  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 * factor );
2329  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 * factor );
2330  }
2331  if( j < N ) {
2332  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2333  for( size_t k=0UL; k<K; ++k ) {
2334  const IntrinsicType b1( set( B(k,j) ) );
2335  xmm1 = xmm1 + A.get(i ,k) * b1;
2336  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2337  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2338  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2339  }
2340  store( &(~C)(i ,j), xmm1 * factor );
2341  store( &(~C)(i+IT::size ,j), xmm2 * factor );
2342  store( &(~C)(i+IT::size*2UL,j), xmm3 * factor );
2343  store( &(~C)(i+IT::size*3UL,j), xmm4 * factor );
2344  }
2345  }
2346  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2347  size_t j( 0UL );
2348  for( ; (j+2UL) <= N; j+=2UL ) {
2349  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2350  for( size_t k=0UL; k<K; ++k ) {
2351  const IntrinsicType a1( A.get(i ,k) );
2352  const IntrinsicType a2( A.get(i+IT::size,k) );
2353  const IntrinsicType b1( set( B(k,j ) ) );
2354  const IntrinsicType b2( set( B(k,j+1UL) ) );
2355  xmm1 = xmm1 + a1 * b1;
2356  xmm2 = xmm2 + a2 * b1;
2357  xmm3 = xmm3 + a1 * b2;
2358  xmm4 = xmm4 + a2 * b2;
2359  }
2360  store( &(~C)(i ,j ), xmm1 * factor );
2361  store( &(~C)(i+IT::size,j ), xmm2 * factor );
2362  store( &(~C)(i ,j+1UL), xmm3 * factor );
2363  store( &(~C)(i+IT::size,j+1UL), xmm4 * factor );
2364  }
2365  if( j < N ) {
2366  IntrinsicType xmm1, xmm2;
2367  for( size_t k=0UL; k<K; ++k ) {
2368  const IntrinsicType b1( set( B(k,j) ) );
2369  xmm1 = xmm1 + A.get(i ,k) * b1;
2370  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
2371  }
2372  store( &(~C)(i ,j), xmm1 * factor );
2373  store( &(~C)(i+IT::size,j), xmm2 * factor );
2374  }
2375  }
2376  if( i < M ) {
2377  size_t j( 0UL );
2378  for( ; (j+2UL) <= N; j+=2UL ) {
2379  IntrinsicType xmm1, xmm2;
2380  for( size_t k=0UL; k<K; ++k ) {
2381  const IntrinsicType a1( A.get(i,k) );
2382  xmm1 = xmm1 + a1 * set( B(k,j ) );
2383  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2384  }
2385  store( &(~C)(i,j ), xmm1 * factor );
2386  store( &(~C)(i,j+1UL), xmm2 * factor );
2387  }
2388  if( j < N ) {
2389  IntrinsicType xmm1;
2390  for( size_t k=0UL; k<K; ++k ) {
2391  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
2392  }
2393  store( &(~C)(i,j), xmm1 * factor );
2394  }
2395  }
2396  }
2397  //**********************************************************************************************
2398 
2399  //**BLAS-based assignment to dense matrices (default)*******************************************
2413  template< typename MT3 // Type of the left-hand side target matrix
2414  , typename MT4 // Type of the left-hand side matrix operand
2415  , typename MT5 // Type of the right-hand side matrix operand
2416  , typename ST2 > // Type of the scalar value
2417  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2418  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2419  {
2420  selectDefaultAssignKernel( C, A, B, scalar );
2421  }
2422  //**********************************************************************************************
2423 
2424  //**BLAS-based assignment to dense matrices (single precision)**********************************
2425 #if BLAZE_BLAS_MODE
2426 
2439  template< typename MT3 // Type of the left-hand side target matrix
2440  , typename MT4 // Type of the left-hand side matrix operand
2441  , typename MT5 // Type of the right-hand side matrix operand
2442  , typename ST2 > // Type of the scalar value
2443  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2444  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2445  {
2446  using boost::numeric_cast;
2447 
2448  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2449  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2450  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2451 
2452  const int M ( numeric_cast<int>( A.rows() ) );
2453  const int N ( numeric_cast<int>( B.columns() ) );
2454  const int K ( numeric_cast<int>( A.columns() ) );
2455  const int lda( numeric_cast<int>( A.spacing() ) );
2456  const int ldb( numeric_cast<int>( B.spacing() ) );
2457  const int ldc( numeric_cast<int>( C.spacing() ) );
2458 
2459  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2460  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2461  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2462  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2463  }
2464 #endif
2465  //**********************************************************************************************
2466 
2467  //**BLAS-based assignment to dense matrices (double precision)**********************************
2468 #if BLAZE_BLAS_MODE
2469 
2482  template< typename MT3 // Type of the left-hand side target matrix
2483  , typename MT4 // Type of the left-hand side matrix operand
2484  , typename MT5 // Type of the right-hand side matrix operand
2485  , typename ST2 > // Type of the scalar value
2486  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2487  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2488  {
2489  using boost::numeric_cast;
2490 
2491  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
2492  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
2493  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
2494 
2495  const int M ( numeric_cast<int>( A.rows() ) );
2496  const int N ( numeric_cast<int>( B.columns() ) );
2497  const int K ( numeric_cast<int>( A.columns() ) );
2498  const int lda( numeric_cast<int>( A.spacing() ) );
2499  const int ldb( numeric_cast<int>( B.spacing() ) );
2500  const int ldc( numeric_cast<int>( C.spacing() ) );
2501 
2502  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2503  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2504  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2505  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2506  }
2507 #endif
2508  //**********************************************************************************************
2509 
2510  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2511 #if BLAZE_BLAS_MODE
2512 
2525  template< typename MT3 // Type of the left-hand side target matrix
2526  , typename MT4 // Type of the left-hand side matrix operand
2527  , typename MT5 // Type of the right-hand side matrix operand
2528  , typename ST2 > // Type of the scalar value
2529  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2530  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2531  {
2532  using boost::numeric_cast;
2533 
2534  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2535  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2536  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2538  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2539  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2540  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2541 
2542  const int M ( numeric_cast<int>( A.rows() ) );
2543  const int N ( numeric_cast<int>( B.columns() ) );
2544  const int K ( numeric_cast<int>( A.columns() ) );
2545  const int lda( numeric_cast<int>( A.spacing() ) );
2546  const int ldb( numeric_cast<int>( B.spacing() ) );
2547  const int ldc( numeric_cast<int>( C.spacing() ) );
2548  const complex<float> alpha( scalar );
2549  const complex<float> beta ( 0.0F, 0.0F );
2550 
2551  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2552  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2553  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2554  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2555  }
2556 #endif
2557  //**********************************************************************************************
2558 
2559  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2560 #if BLAZE_BLAS_MODE
2561 
2574  template< typename MT3 // Type of the left-hand side target matrix
2575  , typename MT4 // Type of the left-hand side matrix operand
2576  , typename MT5 // Type of the right-hand side matrix operand
2577  , typename ST2 > // Type of the scalar value
2578  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2579  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2580  {
2581  using boost::numeric_cast;
2582 
2583  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2584  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2585  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2587  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2588  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2589  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2590 
2591  const int M ( numeric_cast<int>( A.rows() ) );
2592  const int N ( numeric_cast<int>( B.columns() ) );
2593  const int K ( numeric_cast<int>( A.columns() ) );
2594  const int lda( numeric_cast<int>( A.spacing() ) );
2595  const int ldb( numeric_cast<int>( B.spacing() ) );
2596  const int ldc( numeric_cast<int>( C.spacing() ) );
2597  const complex<double> alpha( scalar );
2598  const complex<double> beta ( 0.0, 0.0 );
2599 
2600  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2601  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2602  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2603  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2604  }
2605 #endif
2606  //**********************************************************************************************
2607 
2608  //**Assignment to sparse matrices***************************************************************
2620  template< typename MT // Type of the target sparse matrix
2621  , bool SO > // Storage order of the target sparse matrix
2622  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2623  {
2624  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2625 
2631  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
2632 
2633  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2634  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2635 
2636  const TmpType tmp( rhs );
2637  assign( ~lhs, tmp );
2638  }
2639  //**********************************************************************************************
2640 
2641  //**Addition assignment to dense matrices*******************************************************
2653  template< typename MT3 // Type of the target dense matrix
2654  , bool SO > // Storage order of the target dense matrix
2655  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2656  {
2657  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2658  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2659 
2660  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2661  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2662 
2663  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2664  return;
2665  }
2666 
2667  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2668  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2669 
2670  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2671  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2672  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2673  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2674  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2675  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2676 
2677  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
2678  DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2679  else
2680  DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2681  }
2682  //**********************************************************************************************
2683 
2684  //**Default addition assignment to dense matrices***********************************************
2698  template< typename MT3 // Type of the left-hand side target matrix
2699  , typename MT4 // Type of the left-hand side matrix operand
2700  , typename MT5 // Type of the right-hand side matrix operand
2701  , typename ST2 > // Type of the scalar value
2702  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2703  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2704  {
2705  const ResultType tmp( A * B * scalar );
2706  addAssign( C, tmp );
2707  }
2708  //**********************************************************************************************
2709 
2710  //**Vectorized default addition assignment to row-major dense matrices**************************
2724  template< typename MT3 // Type of the left-hand side target matrix
2725  , typename MT4 // Type of the left-hand side matrix operand
2726  , typename MT5 // Type of the right-hand side matrix operand
2727  , typename ST2 > // Type of the scalar value
2728  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2729  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2730  {
2731  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2732  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2733 
2734  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2735  const typename MT5::OppositeType tmp( B );
2736  addAssign( ~C, A * tmp * scalar );
2737  }
2738  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2739  const typename MT4::OppositeType tmp( A );
2740  addAssign( ~C, tmp * B * scalar );
2741  }
2742  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2743  const typename MT5::OppositeType tmp( B );
2744  addAssign( ~C, A * tmp * scalar );
2745  }
2746  else {
2747  const typename MT4::OppositeType tmp( A );
2748  addAssign( ~C, tmp * B * scalar );
2749  }
2750  }
2751  //**********************************************************************************************
2752 
2753  //**Vectorized default addition assignment to column-major dense matrices***********************
2767  template< typename MT3 // Type of the left-hand side target matrix
2768  , typename MT4 // Type of the left-hand side matrix operand
2769  , typename MT5 // Type of the right-hand side matrix operand
2770  , typename ST2 > // Type of the scalar value
2771  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2772  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2773  {
2774  typedef IntrinsicTrait<ElementType> IT;
2775 
2776  const size_t M( A.spacing() );
2777  const size_t N( B.columns() );
2778  const size_t K( A.columns() );
2779 
2780  const IntrinsicType factor( set( scalar ) );
2781 
2782  size_t i( 0UL );
2783 
2784  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2785  for( size_t j=0UL; j<N; ++j ) {
2786  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2787  for( size_t k=0UL; k<K; ++k ) {
2788  const IntrinsicType b1( set( B(k,j) ) );
2789  xmm1 = xmm1 + A.get(i ,k) * b1;
2790  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2791  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2792  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2793  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
2794  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
2795  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
2796  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
2797  }
2798  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2799  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) + xmm2 * factor );
2800  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) + xmm3 * factor );
2801  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) + xmm4 * factor );
2802  store( &(~C)(i+IT::size*4UL,j), load( &(~C)(i+IT::size*4UL,j) ) + xmm5 * factor );
2803  store( &(~C)(i+IT::size*5UL,j), load( &(~C)(i+IT::size*5UL,j) ) + xmm6 * factor );
2804  store( &(~C)(i+IT::size*6UL,j), load( &(~C)(i+IT::size*6UL,j) ) + xmm7 * factor );
2805  store( &(~C)(i+IT::size*7UL,j), load( &(~C)(i+IT::size*7UL,j) ) + xmm8 * factor );
2806  }
2807  }
2808  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2809  size_t j( 0UL );
2810  for( ; (j+2UL) <= N; j+=2UL ) {
2811  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2812  for( size_t k=0UL; k<K; ++k ) {
2813  const IntrinsicType a1( A.get(i ,k) );
2814  const IntrinsicType a2( A.get(i+IT::size ,k) );
2815  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
2816  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
2817  const IntrinsicType b1( set( B(k,j ) ) );
2818  const IntrinsicType b2( set( B(k,j+1UL) ) );
2819  xmm1 = xmm1 + a1 * b1;
2820  xmm2 = xmm2 + a2 * b1;
2821  xmm3 = xmm3 + a3 * b1;
2822  xmm4 = xmm4 + a4 * b1;
2823  xmm5 = xmm5 + a1 * b2;
2824  xmm6 = xmm6 + a2 * b2;
2825  xmm7 = xmm7 + a3 * b2;
2826  xmm8 = xmm8 + a4 * b2;
2827  }
2828  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2829  store( &(~C)(i+IT::size ,j ), load( &(~C)(i+IT::size ,j ) ) + xmm2 * factor );
2830  store( &(~C)(i+IT::size*2UL,j ), load( &(~C)(i+IT::size*2UL,j ) ) + xmm3 * factor );
2831  store( &(~C)(i+IT::size*3UL,j ), load( &(~C)(i+IT::size*3UL,j ) ) + xmm4 * factor );
2832  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) + xmm5 * factor );
2833  store( &(~C)(i+IT::size ,j+1UL), load( &(~C)(i+IT::size ,j+1UL) ) + xmm6 * factor );
2834  store( &(~C)(i+IT::size*2UL,j+1UL), load( &(~C)(i+IT::size*2UL,j+1UL) ) + xmm7 * factor );
2835  store( &(~C)(i+IT::size*3UL,j+1UL), load( &(~C)(i+IT::size*3UL,j+1UL) ) + xmm8 * factor );
2836  }
2837  if( j < N ) {
2838  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2839  for( size_t k=0UL; k<K; ++k ) {
2840  const IntrinsicType b1( set( B(k,j) ) );
2841  xmm1 = xmm1 + A.get(i ,k) * b1;
2842  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2843  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2844  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2845  }
2846  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2847  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) + xmm2 * factor );
2848  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) + xmm3 * factor );
2849  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) + xmm4 * factor );
2850  }
2851  }
2852  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2853  size_t j( 0UL );
2854  for( ; (j+2UL) <= N; j+=2UL ) {
2855  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2856  for( size_t k=0UL; k<K; ++k ) {
2857  const IntrinsicType a1( A.get(i ,k) );
2858  const IntrinsicType a2( A.get(i+IT::size,k) );
2859  const IntrinsicType b1( set( B(k,j ) ) );
2860  const IntrinsicType b2( set( B(k,j+1UL) ) );
2861  xmm1 = xmm1 + a1 * b1;
2862  xmm2 = xmm2 + a2 * b1;
2863  xmm3 = xmm3 + a1 * b2;
2864  xmm4 = xmm4 + a2 * b2;
2865  }
2866  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2867  store( &(~C)(i+IT::size,j ), load( &(~C)(i+IT::size,j ) ) + xmm2 * factor );
2868  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) + xmm3 * factor );
2869  store( &(~C)(i+IT::size,j+1UL), load( &(~C)(i+IT::size,j+1UL) ) + xmm4 * factor );
2870  }
2871  if( j < N ) {
2872  IntrinsicType xmm1, xmm2;
2873  for( size_t k=0UL; k<K; ++k ) {
2874  const IntrinsicType b1( set( B(k,j) ) );
2875  xmm1 = xmm1 + A.get(i ,k) * b1;
2876  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
2877  }
2878  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2879  store( &(~C)(i+IT::size,j), load( &(~C)(i+IT::size,j) ) + xmm2 * factor );
2880  }
2881  }
2882  if( i < M ) {
2883  size_t j( 0UL );
2884  for( ; (j+2UL) <= N; j+=2UL ) {
2885  IntrinsicType xmm1, xmm2;
2886  for( size_t k=0UL; k<K; ++k ) {
2887  const IntrinsicType a1( A.get(i,k) );
2888  xmm1 = xmm1 + a1 * set( B(k,j ) );
2889  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2890  }
2891  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2892  store( &(~C)(i,j+1UL), load( &(~C)(i,j+1UL) ) + xmm2 * factor );
2893  }
2894  if( j < N ) {
2895  IntrinsicType xmm1;
2896  for( size_t k=0UL; k<K; ++k ) {
2897  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
2898  }
2899  store( &(~C)(i,j), load( &(~C)(i,j) ) + xmm1 * factor );
2900  }
2901  }
2902  }
2903  //**********************************************************************************************
2904 
2905  //**BLAS-based addition assignment to dense matrices (default)**********************************
2919  template< typename MT3 // Type of the left-hand side target matrix
2920  , typename MT4 // Type of the left-hand side matrix operand
2921  , typename MT5 // Type of the right-hand side matrix operand
2922  , typename ST2 > // Type of the scalar value
2923  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2924  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2925  {
2926  selectDefaultAddAssignKernel( C, A, B, scalar );
2927  }
2928  //**********************************************************************************************
2929 
2930  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2931 #if BLAZE_BLAS_MODE
2932 
2945  template< typename MT3 // Type of the left-hand side target matrix
2946  , typename MT4 // Type of the left-hand side matrix operand
2947  , typename MT5 // Type of the right-hand side matrix operand
2948  , typename ST2 > // Type of the scalar value
2949  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2950  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2951  {
2952  using boost::numeric_cast;
2953 
2954  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2955  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2956  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2957 
2958  const int M ( numeric_cast<int>( A.rows() ) );
2959  const int N ( numeric_cast<int>( B.columns() ) );
2960  const int K ( numeric_cast<int>( A.columns() ) );
2961  const int lda( numeric_cast<int>( A.spacing() ) );
2962  const int ldb( numeric_cast<int>( B.spacing() ) );
2963  const int ldc( numeric_cast<int>( C.spacing() ) );
2964 
2965  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2966  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2967  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2968  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
2969  }
2970 #endif
2971  //**********************************************************************************************
2972 
2973  //**BLAS-based addition assignment to dense matrices (double precision)*************************
2974 #if BLAZE_BLAS_MODE
2975 
2988  template< typename MT3 // Type of the left-hand side target matrix
2989  , typename MT4 // Type of the left-hand side matrix operand
2990  , typename MT5 // Type of the right-hand side matrix operand
2991  , typename ST2 > // Type of the scalar value
2992  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2993  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2994  {
2995  using boost::numeric_cast;
2996 
2997  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
2998  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
2999  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3000 
3001  const int M ( numeric_cast<int>( A.rows() ) );
3002  const int N ( numeric_cast<int>( B.columns() ) );
3003  const int K ( numeric_cast<int>( A.columns() ) );
3004  const int lda( numeric_cast<int>( A.spacing() ) );
3005  const int ldb( numeric_cast<int>( B.spacing() ) );
3006  const int ldc( numeric_cast<int>( C.spacing() ) );
3007 
3008  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3009  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3010  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3011  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3012  }
3013 #endif
3014  //**********************************************************************************************
3015 
3016  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3017 #if BLAZE_BLAS_MODE
3018 
3031  template< typename MT3 // Type of the left-hand side target matrix
3032  , typename MT4 // Type of the left-hand side matrix operand
3033  , typename MT5 // Type of the right-hand side matrix operand
3034  , typename ST2 > // Type of the scalar value
3035  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3036  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3037  {
3038  using boost::numeric_cast;
3039 
3040  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3041  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3042  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3044  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3045  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3046  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3047 
3048  const int M ( numeric_cast<int>( A.rows() ) );
3049  const int N ( numeric_cast<int>( B.columns() ) );
3050  const int K ( numeric_cast<int>( A.columns() ) );
3051  const int lda( numeric_cast<int>( A.spacing() ) );
3052  const int ldb( numeric_cast<int>( B.spacing() ) );
3053  const int ldc( numeric_cast<int>( C.spacing() ) );
3054  const complex<float> alpha( scalar );
3055  const complex<float> beta ( 1.0F, 0.0F );
3056 
3057  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3058  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3059  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3060  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3061  }
3062 #endif
3063  //**********************************************************************************************
3064 
3065  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3066 #if BLAZE_BLAS_MODE
3067 
3080  template< typename MT3 // Type of the left-hand side target matrix
3081  , typename MT4 // Type of the left-hand side matrix operand
3082  , typename MT5 // Type of the right-hand side matrix operand
3083  , typename ST2 > // Type of the scalar value
3084  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3085  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3086  {
3087  using boost::numeric_cast;
3088 
3089  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3090  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3091  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3093  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3094  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3095  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3096 
3097  const int M ( numeric_cast<int>( A.rows() ) );
3098  const int N ( numeric_cast<int>( B.columns() ) );
3099  const int K ( numeric_cast<int>( A.columns() ) );
3100  const int lda( numeric_cast<int>( A.spacing() ) );
3101  const int ldb( numeric_cast<int>( B.spacing() ) );
3102  const int ldc( numeric_cast<int>( C.spacing() ) );
3103  const complex<double> alpha( scalar );
3104  const complex<double> beta ( 1.0, 0.0 );
3105 
3106  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3107  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3108  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3109  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3110  }
3111 #endif
3112  //**********************************************************************************************
3113 
3114  //**Addition assignment to sparse matrices******************************************************
3115  // No special implementation for the addition assignment to sparse matrices.
3116  //**********************************************************************************************
3117 
3118  //**Subtraction assignment to dense matrices****************************************************
3130  template< typename MT3 // Type of the target dense matrix
3131  , bool SO > // Storage order of the target dense matrix
3132  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3133  {
3134  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3135  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3136 
3137  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3138  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3139 
3140  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3141  return;
3142  }
3143 
3144  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3145  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3146 
3147  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3148  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3149  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3150  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3151  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3152  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3153 
3154  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
3155  DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3156  else
3157  DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3158  }
3159  //**********************************************************************************************
3160 
3161  //**Default subtraction assignment to dense matrices********************************************
3175  template< typename MT3 // Type of the left-hand side target matrix
3176  , typename MT4 // Type of the left-hand side matrix operand
3177  , typename MT5 // Type of the right-hand side matrix operand
3178  , typename ST2 > // Type of the scalar value
3179  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3180  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3181  {
3182  const ResultType tmp( A * B * scalar );
3183  subAssign( C, tmp );
3184  }
3185  //**********************************************************************************************
3186 
3187  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3201  template< typename MT3 // Type of the left-hand side target matrix
3202  , typename MT4 // Type of the left-hand side matrix operand
3203  , typename MT5 // Type of the right-hand side matrix operand
3204  , typename ST2 > // Type of the scalar value
3205  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3206  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3207  {
3208  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
3209  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
3210 
3211  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3212  const typename MT5::OppositeType tmp( B );
3213  subAssign( ~C, A * tmp * scalar );
3214  }
3215  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3216  const typename MT4::OppositeType tmp( A );
3217  subAssign( ~C, tmp * B * scalar );
3218  }
3219  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3220  const typename MT5::OppositeType tmp( B );
3221  subAssign( ~C, A * tmp * scalar );
3222  }
3223  else {
3224  const typename MT4::OppositeType tmp( A );
3225  subAssign( ~C, tmp * B * scalar );
3226  }
3227  }
3228  //**********************************************************************************************
3229 
3230  //**Vectorized default subtraction assignment to column-major dense matrices********************
3244  template< typename MT3 // Type of the left-hand side target matrix
3245  , typename MT4 // Type of the left-hand side matrix operand
3246  , typename MT5 // Type of the right-hand side matrix operand
3247  , typename ST2 > // Type of the scalar value
3248  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3249  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3250  {
3251  typedef IntrinsicTrait<ElementType> IT;
3252 
3253  const size_t M( A.spacing() );
3254  const size_t N( B.columns() );
3255  const size_t K( A.columns() );
3256 
3257  const IntrinsicType factor( set( scalar ) );
3258 
3259  size_t i( 0UL );
3260 
3261  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
3262  for( size_t j=0UL; j<N; ++j ) {
3263  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3264  for( size_t k=0UL; k<K; ++k ) {
3265  const IntrinsicType b1( set( B(k,j) ) );
3266  xmm1 = xmm1 + A.get(i ,k) * b1;
3267  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
3268  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
3269  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
3270  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
3271  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
3272  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
3273  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
3274  }
3275  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3276  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) - xmm2 * factor );
3277  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) - xmm3 * factor );
3278  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) - xmm4 * factor );
3279  store( &(~C)(i+IT::size*4UL,j), load( &(~C)(i+IT::size*4UL,j) ) - xmm5 * factor );
3280  store( &(~C)(i+IT::size*5UL,j), load( &(~C)(i+IT::size*5UL,j) ) - xmm6 * factor );
3281  store( &(~C)(i+IT::size*6UL,j), load( &(~C)(i+IT::size*6UL,j) ) - xmm7 * factor );
3282  store( &(~C)(i+IT::size*7UL,j), load( &(~C)(i+IT::size*7UL,j) ) - xmm8 * factor );
3283  }
3284  }
3285  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
3286  size_t j( 0UL );
3287  for( ; (j+2UL) <= N; j+=2UL ) {
3288  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3289  for( size_t k=0UL; k<K; ++k ) {
3290  const IntrinsicType a1( A.get(i ,k) );
3291  const IntrinsicType a2( A.get(i+IT::size ,k) );
3292  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
3293  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
3294  const IntrinsicType b1( set( B(k,j ) ) );
3295  const IntrinsicType b2( set( B(k,j+1UL) ) );
3296  xmm1 = xmm1 + a1 * b1;
3297  xmm2 = xmm2 + a2 * b1;
3298  xmm3 = xmm3 + a3 * b1;
3299  xmm4 = xmm4 + a4 * b1;
3300  xmm5 = xmm5 + a1 * b2;
3301  xmm6 = xmm6 + a2 * b2;
3302  xmm7 = xmm7 + a3 * b2;
3303  xmm8 = xmm8 + a4 * b2;
3304  }
3305  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3306  store( &(~C)(i+IT::size ,j ), load( &(~C)(i+IT::size ,j ) ) - xmm2 * factor );
3307  store( &(~C)(i+IT::size*2UL,j ), load( &(~C)(i+IT::size*2UL,j ) ) - xmm3 * factor );
3308  store( &(~C)(i+IT::size*3UL,j ), load( &(~C)(i+IT::size*3UL,j ) ) - xmm4 * factor );
3309  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) - xmm5 * factor );
3310  store( &(~C)(i+IT::size ,j+1UL), load( &(~C)(i+IT::size ,j+1UL) ) - xmm6 * factor );
3311  store( &(~C)(i+IT::size*2UL,j+1UL), load( &(~C)(i+IT::size*2UL,j+1UL) ) - xmm7 * factor );
3312  store( &(~C)(i+IT::size*3UL,j+1UL), load( &(~C)(i+IT::size*3UL,j+1UL) ) - xmm8 * factor );
3313  }
3314  if( j < N ) {
3315  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3316  for( size_t k=0UL; k<K; ++k ) {
3317  const IntrinsicType b1( set( B(k,j) ) );
3318  xmm1 = xmm1 + A.get(i ,k) * b1;
3319  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
3320  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
3321  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
3322  }
3323  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3324  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) - xmm2 * factor );
3325  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) - xmm3 * factor );
3326  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) - xmm4 * factor );
3327  }
3328  }
3329  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
3330  size_t j( 0UL );
3331  for( ; (j+2UL) <= N; j+=2UL ) {
3332  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3333  for( size_t k=0UL; k<K; ++k ) {
3334  const IntrinsicType a1( A.get(i ,k) );
3335  const IntrinsicType a2( A.get(i+IT::size,k) );
3336  const IntrinsicType b1( set( B(k,j ) ) );
3337  const IntrinsicType b2( set( B(k,j+1UL) ) );
3338  xmm1 = xmm1 + a1 * b1;
3339  xmm2 = xmm2 + a2 * b1;
3340  xmm3 = xmm3 + a1 * b2;
3341  xmm4 = xmm4 + a2 * b2;
3342  }
3343  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3344  store( &(~C)(i+IT::size,j ), load( &(~C)(i+IT::size,j ) ) - xmm2 * factor );
3345  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) - xmm3 * factor );
3346  store( &(~C)(i+IT::size,j+1UL), load( &(~C)(i+IT::size,j+1UL) ) - xmm4 * factor );
3347  }
3348  if( j < N ) {
3349  IntrinsicType xmm1, xmm2;
3350  for( size_t k=0UL; k<K; ++k ) {
3351  const IntrinsicType b1( set( B(k,j) ) );
3352  xmm1 = xmm1 + A.get(i ,k) * b1;
3353  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
3354  }
3355  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3356  store( &(~C)(i+IT::size,j), load( &(~C)(i+IT::size,j) ) - xmm2 * factor );
3357  }
3358  }
3359  if( i < M ) {
3360  size_t j( 0UL );
3361  for( ; (j+2UL) <= N; j+=2UL ) {
3362  IntrinsicType xmm1, xmm2;
3363  for( size_t k=0UL; k<K; ++k ) {
3364  const IntrinsicType a1( A.get(i,k) );
3365  xmm1 = xmm1 + a1 * set( B(k,j ) );
3366  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3367  }
3368  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3369  store( &(~C)(i,j+1UL), load( &(~C)(i,j+1UL) ) - xmm2 * factor );
3370  }
3371  if( j < N ) {
3372  IntrinsicType xmm1;
3373  for( size_t k=0UL; k<K; ++k ) {
3374  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
3375  }
3376  store( &(~C)(i,j), load( &(~C)(i,j) ) - xmm1 * factor );
3377  }
3378  }
3379  }
3380  //**********************************************************************************************
3381 
3382  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3396  template< typename MT3 // Type of the left-hand side target matrix
3397  , typename MT4 // Type of the left-hand side matrix operand
3398  , typename MT5 // Type of the right-hand side matrix operand
3399  , typename ST2 > // Type of the scalar value
3400  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3401  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3402  {
3403  selectDefaultSubAssignKernel( C, A, B, scalar );
3404  }
3405  //**********************************************************************************************
3406 
3407  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3408 #if BLAZE_BLAS_MODE
3409 
3422  template< typename MT3 // Type of the left-hand side target matrix
3423  , typename MT4 // Type of the left-hand side matrix operand
3424  , typename MT5 // Type of the right-hand side matrix operand
3425  , typename ST2 > // Type of the scalar value
3426  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3427  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3428  {
3429  using boost::numeric_cast;
3430 
3431  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
3432  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
3433  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
3434 
3435  const int M ( numeric_cast<int>( A.rows() ) );
3436  const int N ( numeric_cast<int>( B.columns() ) );
3437  const int K ( numeric_cast<int>( A.columns() ) );
3438  const int lda( numeric_cast<int>( A.spacing() ) );
3439  const int ldb( numeric_cast<int>( B.spacing() ) );
3440  const int ldc( numeric_cast<int>( C.spacing() ) );
3441 
3442  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3443  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3444  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3445  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3446  }
3447 #endif
3448  //**********************************************************************************************
3449 
3450  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3451 #if BLAZE_BLAS_MODE
3452 
3465  template< typename MT3 // Type of the left-hand side target matrix
3466  , typename MT4 // Type of the left-hand side matrix operand
3467  , typename MT5 // Type of the right-hand side matrix operand
3468  , typename ST2 > // Type of the scalar value
3469  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3470  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3471  {
3472  using boost::numeric_cast;
3473 
3474  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3475  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3476  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3477 
3478  const int M ( numeric_cast<int>( A.rows() ) );
3479  const int N ( numeric_cast<int>( B.columns() ) );
3480  const int K ( numeric_cast<int>( A.columns() ) );
3481  const int lda( numeric_cast<int>( A.spacing() ) );
3482  const int ldb( numeric_cast<int>( B.spacing() ) );
3483  const int ldc( numeric_cast<int>( C.spacing() ) );
3484 
3485  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3486  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3487  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3488  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3489  }
3490 #endif
3491  //**********************************************************************************************
3492 
3493  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3494 #if BLAZE_BLAS_MODE
3495 
3508  template< typename MT3 // Type of the left-hand side target matrix
3509  , typename MT4 // Type of the left-hand side matrix operand
3510  , typename MT5 // Type of the right-hand side matrix operand
3511  , typename ST2 > // Type of the scalar value
3512  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3513  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3514  {
3515  using boost::numeric_cast;
3516 
3517  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3518  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3519  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3521  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3522  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3523  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3524 
3525  const int M ( numeric_cast<int>( A.rows() ) );
3526  const int N ( numeric_cast<int>( B.columns() ) );
3527  const int K ( numeric_cast<int>( A.columns() ) );
3528  const int lda( numeric_cast<int>( A.spacing() ) );
3529  const int ldb( numeric_cast<int>( B.spacing() ) );
3530  const int ldc( numeric_cast<int>( C.spacing() ) );
3531  const complex<float> alpha( -scalar );
3532  const complex<float> beta ( 1.0F, 0.0F );
3533 
3534  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3535  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3536  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3537  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3538  }
3539 #endif
3540  //**********************************************************************************************
3541 
3542  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3543 #if BLAZE_BLAS_MODE
3544 
3557  template< typename MT3 // Type of the left-hand side target matrix
3558  , typename MT4 // Type of the left-hand side matrix operand
3559  , typename MT5 // Type of the right-hand side matrix operand
3560  , typename ST2 > // Type of the scalar value
3561  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3562  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3563  {
3564  using boost::numeric_cast;
3565 
3566  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3567  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3568  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3570  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3571  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3572  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3573 
3574  const int M ( numeric_cast<int>( A.rows() ) );
3575  const int N ( numeric_cast<int>( B.columns() ) );
3576  const int K ( numeric_cast<int>( A.columns() ) );
3577  const int lda( numeric_cast<int>( A.spacing() ) );
3578  const int ldb( numeric_cast<int>( B.spacing() ) );
3579  const int ldc( numeric_cast<int>( C.spacing() ) );
3580  const complex<double> alpha( -scalar );
3581  const complex<double> beta ( 1.0, 0.0 );
3582 
3583  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3584  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3585  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3586  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3587  }
3588 #endif
3589  //**********************************************************************************************
3590 
3591  //**Subtraction assignment to sparse matrices***************************************************
3592  // No special implementation for the subtraction assignment to sparse matrices.
3593  //**********************************************************************************************
3594 
3595  //**Multiplication assignment to dense matrices*************************************************
3596  // No special implementation for the multiplication assignment to dense matrices.
3597  //**********************************************************************************************
3598 
3599  //**Multiplication assignment to sparse matrices************************************************
3600  // No special implementation for the multiplication assignment to sparse matrices.
3601  //**********************************************************************************************
3602 
3603  //**Compile time checks*************************************************************************
3611  //**********************************************************************************************
3612 };
3614 //*************************************************************************************************
3615 
3616 
3617 
3618 
3619 //=================================================================================================
3620 //
3621 // GLOBAL BINARY ARITHMETIC OPERATORS
3622 //
3623 //=================================================================================================
3624 
3625 //*************************************************************************************************
3651 template< typename T1 // Type of the left-hand side dense matrix
3652  , typename T2 > // Type of the right-hand side dense matrix
3653 inline const TDMatTDMatMultExpr<T1,T2>
3655 {
3656  if( (~lhs).columns() != (~rhs).rows() )
3657  throw std::invalid_argument( "Matrix sizes do not match" );
3658 
3659  return TDMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
3660 }
3661 //*************************************************************************************************
3662 
3663 
3664 
3665 
3666 //=================================================================================================
3667 //
3668 // EXPRESSION TRAIT SPECIALIZATIONS
3669 //
3670 //=================================================================================================
3671 
3672 //*************************************************************************************************
3674 template< typename MT1, typename MT2, typename VT >
3675 struct TDMatDVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
3676 {
3677  public:
3678  //**********************************************************************************************
3679  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3680  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3681  IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
3682  , typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
3683  , INVALID_TYPE >::Type Type;
3684  //**********************************************************************************************
3685 };
3687 //*************************************************************************************************
3688 
3689 
3690 //*************************************************************************************************
3692 template< typename MT1, typename MT2, typename VT >
3693 struct TDMatSVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
3694 {
3695  public:
3696  //**********************************************************************************************
3697  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3698  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3699  IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
3700  , typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
3701  , INVALID_TYPE >::Type Type;
3702  //**********************************************************************************************
3703 };
3705 //*************************************************************************************************
3706 
3707 
3708 //*************************************************************************************************
3710 template< typename VT, typename MT1, typename MT2 >
3711 struct TDVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
3712 {
3713  public:
3714  //**********************************************************************************************
3715  typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
3716  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3717  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3718  , typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3719  , INVALID_TYPE >::Type Type;
3720  //**********************************************************************************************
3721 };
3723 //*************************************************************************************************
3724 
3725 
3726 //*************************************************************************************************
3728 template< typename VT, typename MT1, typename MT2 >
3729 struct TSVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
3730 {
3731  public:
3732  //**********************************************************************************************
3733  typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
3734  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3735  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3736  , typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3737  , INVALID_TYPE >::Type Type;
3738  //**********************************************************************************************
3739 };
3741 //*************************************************************************************************
3742 
3743 } // namespace blaze
3744 
3745 #endif