All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
39 #include <blaze/math/Intrinsics.h>
40 #include <blaze/math/shims/Reset.h>
58 #include <blaze/system/BLAS.h>
60 #include <blaze/util/Assert.h>
61 #include <blaze/util/Complex.h>
67 #include <blaze/util/EnableIf.h>
68 #include <blaze/util/InvalidType.h>
70 #include <blaze/util/SelectType.h>
71 #include <blaze/util/Types.h>
77 
78 
79 namespace blaze {
80 
81 //=================================================================================================
82 //
83 // CLASS TDMATTDMATMULTEXPR
84 //
85 //=================================================================================================
86 
87 //*************************************************************************************************
94 template< typename MT1 // Type of the left-hand side dense matrix
95  , typename MT2 > // Type of the right-hand side dense matrix
96 class TDMatTDMatMultExpr : public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
97  , private MatMatMultExpr
98  , private Computation
99 {
100  private:
101  //**Type definitions****************************************************************************
102  typedef typename MT1::ResultType RT1;
103  typedef typename MT2::ResultType RT2;
104  typedef typename MT1::ElementType ET1;
105  typedef typename MT2::ElementType ET2;
106  typedef typename MT1::CompositeType CT1;
107  typedef typename MT2::CompositeType CT2;
108  //**********************************************************************************************
109 
110  //**********************************************************************************************
112 
113 
115  template< typename T1, typename T2, typename T3 >
116  struct UseSinglePrecisionKernel {
120  };
122  //**********************************************************************************************
123 
124  //**********************************************************************************************
126 
127 
129  template< typename T1, typename T2, typename T3 >
130  struct UseDoublePrecisionKernel {
134  };
136  //**********************************************************************************************
137 
138  //**********************************************************************************************
140 
141 
144  template< typename T1, typename T2, typename T3 >
145  struct UseSinglePrecisionComplexKernel {
146  typedef complex<float> Type;
147  enum { value = IsSame<typename T1::ElementType,Type>::value &&
148  IsSame<typename T2::ElementType,Type>::value &&
149  IsSame<typename T3::ElementType,Type>::value };
150  };
152  //**********************************************************************************************
153 
154  //**********************************************************************************************
156 
157 
160  template< typename T1, typename T2, typename T3 >
161  struct UseDoublePrecisionComplexKernel {
162  typedef complex<double> Type;
163  enum { value = IsSame<typename T1::ElementType,Type>::value &&
164  IsSame<typename T2::ElementType,Type>::value &&
165  IsSame<typename T3::ElementType,Type>::value };
166  };
168  //**********************************************************************************************
169 
170  //**********************************************************************************************
172 
173 
175  template< typename T1, typename T2, typename T3 >
176  struct UseDefaultKernel {
177  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
178  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
179  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
180  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
181  };
183  //**********************************************************************************************
184 
185  //**********************************************************************************************
187 
188 
190  template< typename T1, typename T2, typename T3 >
191  struct UseVectorizedDefaultKernel {
192  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
193  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
194  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
195  IntrinsicTrait<typename T1::ElementType>::addition &&
196  IntrinsicTrait<typename T1::ElementType>::multiplication };
197  };
199  //**********************************************************************************************
200 
201  public:
202  //**Type definitions****************************************************************************
205  typedef typename ResultType::OppositeType OppositeType;
206  typedef typename ResultType::TransposeType TransposeType;
207  typedef typename ResultType::ElementType ElementType;
209  typedef const ElementType ReturnType;
210  typedef const ResultType CompositeType;
211 
213  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
214 
216  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
217 
219  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
220 
222  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
223  //**********************************************************************************************
224 
225  //**Compilation flags***************************************************************************
227  enum { vectorizable = 0 };
228  //**********************************************************************************************
229 
230  //**Constructor*********************************************************************************
236  explicit inline TDMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
237  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
238  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
239  {
240  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
241  }
242  //**********************************************************************************************
243 
244  //**Access operator*****************************************************************************
251  inline ReturnType operator()( size_t i, size_t j ) const {
252  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
253  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
254 
255  ElementType tmp;
256 
257  if( lhs_.columns() != 0UL ) {
258  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
259  tmp = lhs_(i,0UL) * rhs_(0UL,j);
260  for( size_t k=1UL; k<end; k+=2UL ) {
261  tmp += lhs_(i,k ) * rhs_(k ,j);
262  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
263  }
264  if( end < lhs_.columns() ) {
265  tmp += lhs_(i,end) * rhs_(end,j);
266  }
267  }
268  else {
269  reset( tmp );
270  }
271 
272  return tmp;
273  }
274  //**********************************************************************************************
275 
276  //**Rows function*******************************************************************************
281  inline size_t rows() const {
282  return lhs_.rows();
283  }
284  //**********************************************************************************************
285 
286  //**Columns function****************************************************************************
291  inline size_t columns() const {
292  return rhs_.columns();
293  }
294  //**********************************************************************************************
295 
296  //**Left operand access*************************************************************************
301  inline LeftOperand leftOperand() const {
302  return lhs_;
303  }
304  //**********************************************************************************************
305 
306  //**Right operand access************************************************************************
311  inline RightOperand rightOperand() const {
312  return rhs_;
313  }
314  //**********************************************************************************************
315 
316  //**********************************************************************************************
322  template< typename T >
323  inline bool canAlias( const T* alias ) const {
324  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
325  }
326  //**********************************************************************************************
327 
328  //**********************************************************************************************
334  template< typename T >
335  inline bool isAliased( const T* alias ) const {
336  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
337  }
338  //**********************************************************************************************
339 
340  private:
341  //**Member variables****************************************************************************
344  //**********************************************************************************************
345 
346  //**Assignment to dense matrices****************************************************************
356  template< typename MT // Type of the target dense matrix
357  , bool SO > // Storage order of the target dense matrix
358  friend inline void assign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
359  {
361 
362  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
363  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
364 
365  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
366  return;
367  }
368  else if( rhs.lhs_.columns() == 0UL ) {
369  reset( ~lhs );
370  return;
371  }
372 
373  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
374  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
375 
376  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
377  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
378  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
379  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
380  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
381  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
382 
383  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
384  TDMatTDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
385  else
386  TDMatTDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
387  }
389  //**********************************************************************************************
390 
391  //**Default assignment to dense matrices********************************************************
405  template< typename MT3 // Type of the left-hand side target matrix
406  , typename MT4 // Type of the left-hand side matrix operand
407  , typename MT5 > // Type of the right-hand side matrix operand
408  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
409  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
410  {
411  const size_t M( A.rows() );
412  const size_t N( B.columns() );
413  const size_t K( A.columns() );
414 
415  for( size_t i=0UL; i<M; ++i ) {
416  for( size_t j=0UL; j<N; ++j ) {
417  C(i,j) = A(i,0UL) * B(0UL,j);
418  }
419  for( size_t k=1UL; k<K; ++k ) {
420  for( size_t j=0UL; j<N; ++j ) {
421  C(i,j) += A(i,k) * B(k,j);
422  }
423  }
424  }
425  }
427  //**********************************************************************************************
428 
429  //**Vectorized default assignment to row-major dense matrices***********************************
443  template< typename MT3 // Type of the left-hand side target matrix
444  , typename MT4 // Type of the left-hand side matrix operand
445  , typename MT5 > // Type of the right-hand side matrix operand
446  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
447  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
448  {
449  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
450  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
451 
452  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
453  const typename MT5::OppositeType tmp( B );
454  assign( ~C, A * tmp );
455  }
456  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
457  const typename MT4::OppositeType tmp( A );
458  assign( ~C, tmp * B );
459  }
460  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
461  const typename MT5::OppositeType tmp( B );
462  assign( ~C, A * tmp );
463  }
464  else {
465  const typename MT4::OppositeType tmp( A );
466  assign( ~C, tmp * B );
467  }
468  }
470  //**********************************************************************************************
471 
472  //**Vectorized default assignment to column-major dense matrices********************************
486  template< typename MT3 // Type of the left-hand side target matrix
487  , typename MT4 // Type of the left-hand side matrix operand
488  , typename MT5 > // Type of the right-hand side matrix operand
489  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
490  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
491  {
492  typedef IntrinsicTrait<ElementType> IT;
493 
494  const size_t M( A.spacing() );
495  const size_t N( B.columns() );
496  const size_t K( A.columns() );
497 
498  size_t i( 0UL );
499 
500  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
501  for( size_t j=0UL; j<N; ++j ) {
502  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
503  for( size_t k=0UL; k<K; ++k ) {
504  const IntrinsicType b1( set( B(k,j) ) );
505  xmm1 = xmm1 + A.get(i ,k) * b1;
506  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
507  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
508  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
509  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
510  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
511  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
512  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
513  }
514  store( &(~C)(i ,j), xmm1 );
515  store( &(~C)(i+IT::size ,j), xmm2 );
516  store( &(~C)(i+IT::size*2UL,j), xmm3 );
517  store( &(~C)(i+IT::size*3UL,j), xmm4 );
518  store( &(~C)(i+IT::size*4UL,j), xmm5 );
519  store( &(~C)(i+IT::size*5UL,j), xmm6 );
520  store( &(~C)(i+IT::size*6UL,j), xmm7 );
521  store( &(~C)(i+IT::size*7UL,j), xmm8 );
522  }
523  }
524  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
525  size_t j( 0UL );
526  for( ; (j+2UL) <= N; j+=2UL ) {
527  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
528  for( size_t k=0UL; k<K; ++k ) {
529  const IntrinsicType a1( A.get(i ,k) );
530  const IntrinsicType a2( A.get(i+IT::size ,k) );
531  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
532  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
533  const IntrinsicType b1( set( B(k,j ) ) );
534  const IntrinsicType b2( set( B(k,j+1UL) ) );
535  xmm1 = xmm1 + a1 * b1;
536  xmm2 = xmm2 + a2 * b1;
537  xmm3 = xmm3 + a3 * b1;
538  xmm4 = xmm4 + a4 * b1;
539  xmm5 = xmm5 + a1 * b2;
540  xmm6 = xmm6 + a2 * b2;
541  xmm7 = xmm7 + a3 * b2;
542  xmm8 = xmm8 + a4 * b2;
543  }
544  store( &(~C)(i ,j ), xmm1 );
545  store( &(~C)(i+IT::size ,j ), xmm2 );
546  store( &(~C)(i+IT::size*2UL,j ), xmm3 );
547  store( &(~C)(i+IT::size*3UL,j ), xmm4 );
548  store( &(~C)(i ,j+1UL), xmm5 );
549  store( &(~C)(i+IT::size ,j+1UL), xmm6 );
550  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 );
551  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 );
552  }
553  if( j < N ) {
554  IntrinsicType xmm1, xmm2, xmm3, xmm4;
555  for( size_t k=0UL; k<K; ++k ) {
556  const IntrinsicType b1( set( B(k,j) ) );
557  xmm1 = xmm1 + A.get(i ,k) * b1;
558  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
559  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
560  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
561  }
562  store( &(~C)(i ,j), xmm1 );
563  store( &(~C)(i+IT::size ,j), xmm2 );
564  store( &(~C)(i+IT::size*2UL,j), xmm3 );
565  store( &(~C)(i+IT::size*3UL,j), xmm4 );
566  }
567  }
568  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
569  size_t j( 0UL );
570  for( ; (j+2UL) <= N; j+=2UL ) {
571  IntrinsicType xmm1, xmm2, xmm3, xmm4;
572  for( size_t k=0UL; k<K; ++k ) {
573  const IntrinsicType a1( A.get(i ,k) );
574  const IntrinsicType a2( A.get(i+IT::size,k) );
575  const IntrinsicType b1( set( B(k,j ) ) );
576  const IntrinsicType b2( set( B(k,j+1UL) ) );
577  xmm1 = xmm1 + a1 * b1;
578  xmm2 = xmm2 + a2 * b1;
579  xmm3 = xmm3 + a1 * b2;
580  xmm4 = xmm4 + a2 * b2;
581  }
582  store( &(~C)(i ,j ), xmm1 );
583  store( &(~C)(i+IT::size,j ), xmm2 );
584  store( &(~C)(i ,j+1UL), xmm3 );
585  store( &(~C)(i+IT::size,j+1UL), xmm4 );
586  }
587  if( j < N ) {
588  IntrinsicType xmm1, xmm2;
589  for( size_t k=0UL; k<K; ++k ) {
590  const IntrinsicType b1( set( B(k,j) ) );
591  xmm1 = xmm1 + A.get(i ,k) * b1;
592  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
593  }
594  store( &(~C)(i ,j), xmm1 );
595  store( &(~C)(i+IT::size,j), xmm2 );
596  }
597  }
598  if( i < M ) {
599  size_t j( 0UL );
600  for( ; (j+2UL) <= N; j+=2UL ) {
601  IntrinsicType xmm1, xmm2;
602  for( size_t k=0UL; k<K; ++k ) {
603  const IntrinsicType a1( A.get(i,k) );
604  xmm1 = xmm1 + a1 * set( B(k,j ) );
605  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
606  }
607  store( &(~C)(i,j ), xmm1 );
608  store( &(~C)(i,j+1UL), xmm2 );
609  }
610  if( j < N ) {
611  IntrinsicType xmm1;
612  for( size_t k=0UL; k<K; ++k ) {
613  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
614  }
615  store( &(~C)(i,j), xmm1 );
616  }
617  }
618  }
620  //**********************************************************************************************
621 
622  //**BLAS-based assignment to dense matrices (default)*******************************************
636  template< typename MT3 // Type of the left-hand side target matrix
637  , typename MT4 // Type of the left-hand side matrix operand
638  , typename MT5 > // Type of the right-hand side matrix operand
639  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
640  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
641  {
642  selectDefaultAssignKernel( C, A, B );
643  }
645  //**********************************************************************************************
646 
647  //**BLAS-based assignment to dense matrices (single precision)**********************************
648 #if BLAZE_BLAS_MODE
649 
662  template< typename MT3 // Type of the left-hand side target matrix
663  , typename MT4 // Type of the left-hand side matrix operand
664  , typename MT5 > // Type of the right-hand side matrix operand
665  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
666  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
667  {
668  using boost::numeric_cast;
669 
670  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
671  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
672  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
673 
674  const int M ( numeric_cast<int>( A.rows() ) );
675  const int N ( numeric_cast<int>( B.columns() ) );
676  const int K ( numeric_cast<int>( A.columns() ) );
677  const int lda( numeric_cast<int>( A.spacing() ) );
678  const int ldb( numeric_cast<int>( B.spacing() ) );
679  const int ldc( numeric_cast<int>( C.spacing() ) );
680 
681  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
682  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
683  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
684  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
685  }
687 #endif
688  //**********************************************************************************************
689 
690  //**BLAS-based assignment to dense matrices (double precision)**********************************
691 #if BLAZE_BLAS_MODE
692 
705  template< typename MT3 // Type of the left-hand side target matrix
706  , typename MT4 // Type of the left-hand side matrix operand
707  , typename MT5 > // Type of the right-hand side matrix operand
708  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
709  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
710  {
711  using boost::numeric_cast;
712 
713  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
714  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
715  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
716 
717  const int M ( numeric_cast<int>( A.rows() ) );
718  const int N ( numeric_cast<int>( B.columns() ) );
719  const int K ( numeric_cast<int>( A.columns() ) );
720  const int lda( numeric_cast<int>( A.spacing() ) );
721  const int ldb( numeric_cast<int>( B.spacing() ) );
722  const int ldc( numeric_cast<int>( C.spacing() ) );
723 
724  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
725  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
726  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
727  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
728  }
730 #endif
731  //**********************************************************************************************
732 
733  //**BLAS-based assignment to dense matrices (single precision complex)**************************
734 #if BLAZE_BLAS_MODE
735 
748  template< typename MT3 // Type of the left-hand side target matrix
749  , typename MT4 // Type of the left-hand side matrix operand
750  , typename MT5 > // Type of the right-hand side matrix operand
751  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
752  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
753  {
754  using boost::numeric_cast;
755 
756  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
757  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
758  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
759  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
760  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
761  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
762 
763  const int M ( numeric_cast<int>( A.rows() ) );
764  const int N ( numeric_cast<int>( B.columns() ) );
765  const int K ( numeric_cast<int>( A.columns() ) );
766  const int lda( numeric_cast<int>( A.spacing() ) );
767  const int ldb( numeric_cast<int>( B.spacing() ) );
768  const int ldc( numeric_cast<int>( C.spacing() ) );
769  complex<float> alpha( 1.0F, 0.0F );
770  complex<float> beta ( 0.0F, 0.0F );
771 
772  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
773  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
774  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
775  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
776  }
778 #endif
779  //**********************************************************************************************
780 
781  //**BLAS-based assignment to dense matrices (double precision complex)**************************
782 #if BLAZE_BLAS_MODE
783 
796  template< typename MT3 // Type of the left-hand side target matrix
797  , typename MT4 // Type of the left-hand side matrix operand
798  , typename MT5 > // Type of the right-hand side matrix operand
799  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
800  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
801  {
802  using boost::numeric_cast;
803 
804  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
805  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
806  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
807  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
808  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
809  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
810 
811  const int M ( numeric_cast<int>( A.rows() ) );
812  const int N ( numeric_cast<int>( B.columns() ) );
813  const int K ( numeric_cast<int>( A.columns() ) );
814  const int lda( numeric_cast<int>( A.spacing() ) );
815  const int ldb( numeric_cast<int>( B.spacing() ) );
816  const int ldc( numeric_cast<int>( C.spacing() ) );
817  complex<double> alpha( 1.0, 0.0 );
818  complex<double> beta ( 0.0, 0.0 );
819 
820  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
821  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
822  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
823  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
824  }
826 #endif
827  //**********************************************************************************************
828 
829  //**Assignment to sparse matrices***************************************************************
842  template< typename MT // Type of the target sparse matrix
843  , bool SO > // Storage order of the target sparse matrix
844  friend inline void assign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
845  {
847 
848  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
849 
855  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
856 
857  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
858  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
859 
860  const TmpType tmp( rhs );
861  assign( ~lhs, tmp );
862  }
864  //**********************************************************************************************
865 
866  //**Addition assignment to dense matrices*******************************************************
879  template< typename MT // Type of the target dense matrix
880  , bool SO > // Storage order of the target dense matrix
881  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
882  {
884 
885  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
886  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
887 
888  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
889  return;
890  }
891 
892  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
893  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
894 
895  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
896  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
897  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
898  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
899  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
900  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
901 
902  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
903  TDMatTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
904  else
905  TDMatTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
906  }
908  //**********************************************************************************************
909 
910  //**Default addition assignment to dense matrices***********************************************
924  template< typename MT3 // Type of the left-hand side target matrix
925  , typename MT4 // Type of the left-hand side matrix operand
926  , typename MT5 > // Type of the right-hand side matrix operand
927  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
928  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
929  {
930  const size_t M( A.rows() );
931  const size_t N( B.columns() );
932  const size_t K( A.columns() );
933 
934  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
935  const size_t end( N & size_t(-2) );
936 
937  for( size_t i=0UL; i<M; ++i ) {
938  for( size_t k=0UL; k<K; ++k ) {
939  for( size_t j=0UL; j<end; j+=2UL ) {
940  C(i,j ) += A(i,k) * B(k,j );
941  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
942  }
943  if( end < N ) {
944  C(i,end) += A(i,k) * B(k,end);
945  }
946  }
947  }
948  }
950  //**********************************************************************************************
951 
952  //**Vectorized default addition assignment to row-major dense matrices**************************
966  template< typename MT3 // Type of the left-hand side target matrix
967  , typename MT4 // Type of the left-hand side matrix operand
968  , typename MT5 > // Type of the right-hand side matrix operand
969  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
970  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
971  {
972  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
973  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
974 
975  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
976  const typename MT5::OppositeType tmp( B );
977  addAssign( ~C, A * tmp );
978  }
979  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
980  const typename MT4::OppositeType tmp( A );
981  addAssign( ~C, tmp * B );
982  }
983  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
984  const typename MT5::OppositeType tmp( B );
985  addAssign( ~C, A * tmp );
986  }
987  else {
988  const typename MT4::OppositeType tmp( A );
989  addAssign( ~C, tmp * B );
990  }
991  }
993  //**********************************************************************************************
994 
995  //**Vectorized default addition assignment to column-major dense matrices***********************
1009  template< typename MT3 // Type of the left-hand side target matrix
1010  , typename MT4 // Type of the left-hand side matrix operand
1011  , typename MT5 > // Type of the right-hand side matrix operand
1012  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1013  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1014  {
1015  typedef IntrinsicTrait<ElementType> IT;
1016 
1017  const size_t M( A.spacing() );
1018  const size_t N( B.columns() );
1019  const size_t K( A.columns() );
1020 
1021  size_t i( 0UL );
1022 
1023  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1024  for( size_t j=0UL; j<N; ++j ) {
1025  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1026  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1027  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1028  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1029  IntrinsicType xmm5( load( &(~C)(i+IT::size*4UL,j) ) );
1030  IntrinsicType xmm6( load( &(~C)(i+IT::size*5UL,j) ) );
1031  IntrinsicType xmm7( load( &(~C)(i+IT::size*6UL,j) ) );
1032  IntrinsicType xmm8( load( &(~C)(i+IT::size*7UL,j) ) );
1033  for( size_t k=0UL; k<K; ++k ) {
1034  const IntrinsicType b1( set( B(k,j) ) );
1035  xmm1 = xmm1 + A.get(i ,k) * b1;
1036  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
1037  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
1038  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
1039  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
1040  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
1041  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
1042  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
1043  }
1044  store( &(~C)(i ,j), xmm1 );
1045  store( &(~C)(i+IT::size ,j), xmm2 );
1046  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1047  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1048  store( &(~C)(i+IT::size*4UL,j), xmm5 );
1049  store( &(~C)(i+IT::size*5UL,j), xmm6 );
1050  store( &(~C)(i+IT::size*6UL,j), xmm7 );
1051  store( &(~C)(i+IT::size*7UL,j), xmm8 );
1052  }
1053  }
1054  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1055  size_t j( 0UL );
1056  for( ; (j+2UL) <= N; j+=2UL ) {
1057  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1058  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j ) ) );
1059  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j ) ) );
1060  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j ) ) );
1061  IntrinsicType xmm5( load( &(~C)(i ,j+1UL) ) );
1062  IntrinsicType xmm6( load( &(~C)(i+IT::size ,j+1UL) ) );
1063  IntrinsicType xmm7( load( &(~C)(i+IT::size*2UL,j+1UL) ) );
1064  IntrinsicType xmm8( load( &(~C)(i+IT::size*3UL,j+1UL) ) );
1065  for( size_t k=0UL; k<K; ++k ) {
1066  const IntrinsicType a1( A.get(i ,k) );
1067  const IntrinsicType a2( A.get(i+IT::size ,k) );
1068  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
1069  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
1070  const IntrinsicType b1( set( B(k,j ) ) );
1071  const IntrinsicType b2( set( B(k,j+1UL) ) );
1072  xmm1 = xmm1 + a1 * b1;
1073  xmm2 = xmm2 + a2 * b1;
1074  xmm3 = xmm3 + a3 * b1;
1075  xmm4 = xmm4 + a4 * b1;
1076  xmm5 = xmm5 + a1 * b2;
1077  xmm6 = xmm6 + a2 * b2;
1078  xmm7 = xmm7 + a3 * b2;
1079  xmm8 = xmm8 + a4 * b2;
1080  }
1081  store( &(~C)(i ,j ), xmm1 );
1082  store( &(~C)(i+IT::size ,j ), xmm2 );
1083  store( &(~C)(i+IT::size*2UL,j ), xmm3 );
1084  store( &(~C)(i+IT::size*3UL,j ), xmm4 );
1085  store( &(~C)(i ,j+1UL), xmm5 );
1086  store( &(~C)(i+IT::size ,j+1UL), xmm6 );
1087  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 );
1088  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 );
1089  }
1090  if( j < N ) {
1091  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1092  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1093  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1094  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1095  for( size_t k=0UL; k<K; ++k ) {
1096  const IntrinsicType b1( set( B(k,j) ) );
1097  xmm1 = xmm1 + A.get(i ,k) * b1;
1098  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
1099  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
1100  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
1101  }
1102  store( &(~C)(i ,j), xmm1 );
1103  store( &(~C)(i+IT::size ,j), xmm2 );
1104  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1105  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1106  }
1107  }
1108  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1109  size_t j( 0UL );
1110  for( ; (j+2UL) <= N; j+=2UL ) {
1111  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1112  IntrinsicType xmm2( load( &(~C)(i+IT::size,j ) ) );
1113  IntrinsicType xmm3( load( &(~C)(i ,j+1UL) ) );
1114  IntrinsicType xmm4( load( &(~C)(i+IT::size,j+1UL) ) );
1115  for( size_t k=0UL; k<K; ++k ) {
1116  const IntrinsicType a1( A.get(i ,k) );
1117  const IntrinsicType a2( A.get(i+IT::size,k) );
1118  const IntrinsicType b1( set( B(k,j ) ) );
1119  const IntrinsicType b2( set( B(k,j+1UL) ) );
1120  xmm1 = xmm1 + a1 * b1;
1121  xmm2 = xmm2 + a2 * b1;
1122  xmm3 = xmm3 + a1 * b2;
1123  xmm4 = xmm4 + a2 * b2;
1124  }
1125  store( &(~C)(i ,j ), xmm1 );
1126  store( &(~C)(i+IT::size,j ), xmm2 );
1127  store( &(~C)(i ,j+1UL), xmm3 );
1128  store( &(~C)(i+IT::size,j+1UL), xmm4 );
1129  }
1130  if( j < N ) {
1131  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1132  IntrinsicType xmm2( load( &(~C)(i+IT::size,j) ) );
1133  for( size_t k=0UL; k<K; ++k ) {
1134  const IntrinsicType b1( set( B(k,j) ) );
1135  xmm1 = xmm1 + A.get(i ,k) * b1;
1136  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
1137  }
1138  store( &(~C)(i ,j), xmm1 );
1139  store( &(~C)(i+IT::size,j), xmm2 );
1140  }
1141  }
1142  if( i < M ) {
1143  size_t j( 0UL );
1144  for( ; (j+2UL) <= N; j+=2UL ) {
1145  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1146  IntrinsicType xmm2( load( &(~C)(i,j+1UL) ) );
1147  for( size_t k=0UL; k<K; ++k ) {
1148  const IntrinsicType a1( A.get(i,k) );
1149  xmm1 = xmm1 + a1 * set( B(k,j ) );
1150  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1151  }
1152  store( &(~C)(i,j ), xmm1 );
1153  store( &(~C)(i,j+1UL), xmm2 );
1154  }
1155  if( j < N ) {
1156  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1157  for( size_t k=0UL; k<K; ++k ) {
1158  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
1159  }
1160  store( &(~C)(i,j), xmm1 );
1161  }
1162  }
1163  }
1165  //**********************************************************************************************
1166 
1167  //**BLAS-based addition assignment to dense matrices (default)**********************************
1181  template< typename MT3 // Type of the left-hand side target matrix
1182  , typename MT4 // Type of the left-hand side matrix operand
1183  , typename MT5 > // Type of the right-hand side matrix operand
1184  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1185  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1186  {
1187  selectDefaultAddAssignKernel( C, A, B );
1188  }
1190  //**********************************************************************************************
1191 
1192  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1193 #if BLAZE_BLAS_MODE
1194 
1207  template< typename MT3 // Type of the left-hand side target matrix
1208  , typename MT4 // Type of the left-hand side matrix operand
1209  , typename MT5 > // Type of the right-hand side matrix operand
1210  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1211  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1212  {
1213  using boost::numeric_cast;
1214 
1215  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1216  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1217  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1218 
1219  const int M ( numeric_cast<int>( A.rows() ) );
1220  const int N ( numeric_cast<int>( B.columns() ) );
1221  const int K ( numeric_cast<int>( A.columns() ) );
1222  const int lda( numeric_cast<int>( A.spacing() ) );
1223  const int ldb( numeric_cast<int>( B.spacing() ) );
1224  const int ldc( numeric_cast<int>( C.spacing() ) );
1225 
1226  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1227  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1228  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1229  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1230  }
1232 #endif
1233  //**********************************************************************************************
1234 
1235  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1236 #if BLAZE_BLAS_MODE
1237 
1250  template< typename MT3 // Type of the left-hand side target matrix
1251  , typename MT4 // Type of the left-hand side matrix operand
1252  , typename MT5 > // Type of the right-hand side matrix operand
1253  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1254  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1255  {
1256  using boost::numeric_cast;
1257 
1258  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1259  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1260  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1261 
1262  const int M ( numeric_cast<int>( A.rows() ) );
1263  const int N ( numeric_cast<int>( B.columns() ) );
1264  const int K ( numeric_cast<int>( A.columns() ) );
1265  const int lda( numeric_cast<int>( A.spacing() ) );
1266  const int ldb( numeric_cast<int>( B.spacing() ) );
1267  const int ldc( numeric_cast<int>( C.spacing() ) );
1268 
1269  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1270  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1271  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1272  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1273  }
1275 #endif
1276  //**********************************************************************************************
1277 
1278  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1279 #if BLAZE_BLAS_MODE
1280 
1293  template< typename MT3 // Type of the left-hand side target matrix
1294  , typename MT4 // Type of the left-hand side matrix operand
1295  , typename MT5 > // Type of the right-hand side matrix operand
1296  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1297  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1298  {
1299  using boost::numeric_cast;
1300 
1301  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1302  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1303  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1304  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1305  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1306  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1307 
1308  const int M ( numeric_cast<int>( A.rows() ) );
1309  const int N ( numeric_cast<int>( B.columns() ) );
1310  const int K ( numeric_cast<int>( A.columns() ) );
1311  const int lda( numeric_cast<int>( A.spacing() ) );
1312  const int ldb( numeric_cast<int>( B.spacing() ) );
1313  const int ldc( numeric_cast<int>( C.spacing() ) );
1314  const complex<float> alpha( 1.0F, 0.0F );
1315  const complex<float> beta ( 1.0F, 0.0F );
1316 
1317  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1318  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1319  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1320  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1321  }
1323 #endif
1324  //**********************************************************************************************
1325 
1326  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1327 #if BLAZE_BLAS_MODE
1328 
1341  template< typename MT3 // Type of the left-hand side target matrix
1342  , typename MT4 // Type of the left-hand side matrix operand
1343  , typename MT5 > // Type of the right-hand side matrix operand
1344  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1345  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1346  {
1347  using boost::numeric_cast;
1348 
1349  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1350  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1351  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1352  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1353  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1354  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1355 
1356  const int M ( numeric_cast<int>( A.rows() ) );
1357  const int N ( numeric_cast<int>( B.columns() ) );
1358  const int K ( numeric_cast<int>( A.columns() ) );
1359  const int lda( numeric_cast<int>( A.spacing() ) );
1360  const int ldb( numeric_cast<int>( B.spacing() ) );
1361  const int ldc( numeric_cast<int>( C.spacing() ) );
1362  const complex<double> alpha( 1.0, 0.0 );
1363  const complex<double> beta ( 1.0, 0.0 );
1364 
1365  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1366  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1367  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1368  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1369  }
1371 #endif
1372  //**********************************************************************************************
1373 
1374  //**Addition assignment to sparse matrices******************************************************
1375  // No special implementation for the addition assignment to sparse matrices.
1376  //**********************************************************************************************
1377 
1378  //**Subtraction assignment to dense matrices****************************************************
1391  template< typename MT // Type of the target dense matrix
1392  , bool SO > // Storage order of the target dense matrix
1393  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1394  {
1396 
1397  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1398  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1399 
1400  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1401  return;
1402  }
1403 
1404  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1405  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1406 
1407  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1408  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1409  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1410  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1411  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1412  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1413 
1414  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
1415  TDMatTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1416  else
1417  TDMatTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1418  }
1420  //**********************************************************************************************
1421 
1422  //**Default subtraction assignment to dense matrices********************************************
1436  template< typename MT3 // Type of the left-hand side target matrix
1437  , typename MT4 // Type of the left-hand side matrix operand
1438  , typename MT5 > // Type of the right-hand side matrix operand
1439  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1440  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1441  {
1442  const size_t M( A.rows() );
1443  const size_t N( B.columns() );
1444  const size_t K( A.columns() );
1445 
1446  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1447  const size_t end( N & size_t(-2) );
1448 
1449  for( size_t i=0UL; i<M; ++i ) {
1450  for( size_t k=0UL; k<K; ++k ) {
1451  for( size_t j=0UL; j<end; j+=2UL ) {
1452  C(i,j ) -= A(i,k) * B(k,j );
1453  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1454  }
1455  if( end < N ) {
1456  C(i,end) -= A(i,k) * B(k,end);
1457  }
1458  }
1459  }
1460  }
1462  //**********************************************************************************************
1463 
1464  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1478  template< typename MT3 // Type of the left-hand side target matrix
1479  , typename MT4 // Type of the left-hand side matrix operand
1480  , typename MT5 > // Type of the right-hand side matrix operand
1481  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1482  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1483  {
1484  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
1485  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
1486 
1487  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1488  const typename MT5::OppositeType tmp( B );
1489  subAssign( ~C, A * tmp );
1490  }
1491  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1492  const typename MT4::OppositeType tmp( A );
1493  subAssign( ~C, tmp * B );
1494  }
1495  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1496  const typename MT5::OppositeType tmp( B );
1497  subAssign( ~C, A * tmp );
1498  }
1499  else {
1500  const typename MT4::OppositeType tmp( A );
1501  subAssign( ~C, tmp * B );
1502  }
1503  }
1505  //**********************************************************************************************
1506 
1507  //**Vectorized default subtraction assignment to column-major dense matrices********************
1521  template< typename MT3 // Type of the left-hand side target matrix
1522  , typename MT4 // Type of the left-hand side matrix operand
1523  , typename MT5 > // Type of the right-hand side matrix operand
1524  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1525  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1526  {
1527  typedef IntrinsicTrait<ElementType> IT;
1528 
1529  const size_t M( A.spacing() );
1530  const size_t N( B.columns() );
1531  const size_t K( A.columns() );
1532 
1533  size_t i( 0UL );
1534 
1535  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1536  for( size_t j=0UL; j<N; ++j ) {
1537  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1538  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1539  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1540  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1541  IntrinsicType xmm5( load( &(~C)(i+IT::size*4UL,j) ) );
1542  IntrinsicType xmm6( load( &(~C)(i+IT::size*5UL,j) ) );
1543  IntrinsicType xmm7( load( &(~C)(i+IT::size*6UL,j) ) );
1544  IntrinsicType xmm8( load( &(~C)(i+IT::size*7UL,j) ) );
1545  for( size_t k=0UL; k<K; ++k ) {
1546  const IntrinsicType b1( set( B(k,j) ) );
1547  xmm1 = xmm1 - A.get(i ,k) * b1;
1548  xmm2 = xmm2 - A.get(i+IT::size ,k) * b1;
1549  xmm3 = xmm3 - A.get(i+IT::size*2UL,k) * b1;
1550  xmm4 = xmm4 - A.get(i+IT::size*3UL,k) * b1;
1551  xmm5 = xmm5 - A.get(i+IT::size*4UL,k) * b1;
1552  xmm6 = xmm6 - A.get(i+IT::size*5UL,k) * b1;
1553  xmm7 = xmm7 - A.get(i+IT::size*6UL,k) * b1;
1554  xmm8 = xmm8 - A.get(i+IT::size*7UL,k) * b1;
1555  }
1556  store( &(~C)(i ,j), xmm1 );
1557  store( &(~C)(i+IT::size ,j), xmm2 );
1558  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1559  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1560  store( &(~C)(i+IT::size*4UL,j), xmm5 );
1561  store( &(~C)(i+IT::size*5UL,j), xmm6 );
1562  store( &(~C)(i+IT::size*6UL,j), xmm7 );
1563  store( &(~C)(i+IT::size*7UL,j), xmm8 );
1564  }
1565  }
1566  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1567  size_t j( 0UL );
1568  for( ; (j+2UL) <= N; j+=2UL ) {
1569  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1570  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j ) ) );
1571  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j ) ) );
1572  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j ) ) );
1573  IntrinsicType xmm5( load( &(~C)(i ,j+1UL) ) );
1574  IntrinsicType xmm6( load( &(~C)(i+IT::size ,j+1UL) ) );
1575  IntrinsicType xmm7( load( &(~C)(i+IT::size*2UL,j+1UL) ) );
1576  IntrinsicType xmm8( load( &(~C)(i+IT::size*3UL,j+1UL) ) );
1577  for( size_t k=0UL; k<K; ++k ) {
1578  const IntrinsicType a1( A.get(i ,k) );
1579  const IntrinsicType a2( A.get(i+IT::size ,k) );
1580  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
1581  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
1582  const IntrinsicType b1( set( B(k,j ) ) );
1583  const IntrinsicType b2( set( B(k,j+1UL) ) );
1584  xmm1 = xmm1 - a1 * b1;
1585  xmm2 = xmm2 - a2 * b1;
1586  xmm3 = xmm3 - a3 * b1;
1587  xmm4 = xmm4 - a4 * b1;
1588  xmm5 = xmm5 - a1 * b2;
1589  xmm6 = xmm6 - a2 * b2;
1590  xmm7 = xmm7 - a3 * b2;
1591  xmm8 = xmm8 - a4 * b2;
1592  }
1593  store( &(~C)(i ,j ), xmm1 );
1594  store( &(~C)(i+IT::size ,j ), xmm2 );
1595  store( &(~C)(i+IT::size*2UL,j ), xmm3 );
1596  store( &(~C)(i+IT::size*3UL,j ), xmm4 );
1597  store( &(~C)(i ,j+1UL), xmm5 );
1598  store( &(~C)(i+IT::size ,j+1UL), xmm6 );
1599  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 );
1600  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 );
1601  }
1602  if( j < N ) {
1603  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1604  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1605  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1606  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1607  for( size_t k=0UL; k<K; ++k ) {
1608  const IntrinsicType b1( set( B(k,j) ) );
1609  xmm1 = xmm1 - A.get(i ,k) * b1;
1610  xmm2 = xmm2 - A.get(i+IT::size ,k) * b1;
1611  xmm3 = xmm3 - A.get(i+IT::size*2UL,k) * b1;
1612  xmm4 = xmm4 - A.get(i+IT::size*3UL,k) * b1;
1613  }
1614  store( &(~C)(i ,j), xmm1 );
1615  store( &(~C)(i+IT::size ,j), xmm2 );
1616  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1617  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1618  }
1619  }
1620  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1621  size_t j( 0UL );
1622  for( ; (j+2UL) <= N; j+=2UL ) {
1623  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1624  IntrinsicType xmm2( load( &(~C)(i+IT::size,j ) ) );
1625  IntrinsicType xmm3( load( &(~C)(i ,j+1UL) ) );
1626  IntrinsicType xmm4( load( &(~C)(i+IT::size,j+1UL) ) );
1627  for( size_t k=0UL; k<K; ++k ) {
1628  const IntrinsicType a1( A.get(i ,k) );
1629  const IntrinsicType a2( A.get(i+IT::size,k) );
1630  const IntrinsicType b1( set( B(k,j ) ) );
1631  const IntrinsicType b2( set( B(k,j+1UL) ) );
1632  xmm1 = xmm1 - a1 * b1;
1633  xmm2 = xmm2 - a2 * b1;
1634  xmm3 = xmm3 - a1 * b2;
1635  xmm4 = xmm4 - a2 * b2;
1636  }
1637  store( &(~C)(i ,j ), xmm1 );
1638  store( &(~C)(i+IT::size,j ), xmm2 );
1639  store( &(~C)(i ,j+1UL), xmm3 );
1640  store( &(~C)(i+IT::size,j+1UL), xmm4 );
1641  }
1642  if( j < N ) {
1643  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1644  IntrinsicType xmm2( load( &(~C)(i+IT::size,j) ) );
1645  for( size_t k=0UL; k<K; ++k ) {
1646  const IntrinsicType b1( set( B(k,j) ) );
1647  xmm1 = xmm1 - A.get(i ,k) * b1;
1648  xmm2 = xmm2 - A.get(i+IT::size,k) * b1;
1649  }
1650  store( &(~C)(i ,j), xmm1 );
1651  store( &(~C)(i+IT::size,j), xmm2 );
1652  }
1653  }
1654  if( i < M ) {
1655  size_t j( 0UL );
1656  for( ; (j+2UL) <= N; j+=2UL ) {
1657  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1658  IntrinsicType xmm2( load( &(~C)(i,j+1UL) ) );
1659  for( size_t k=0UL; k<K; ++k ) {
1660  const IntrinsicType a1( A.get(i,k) );
1661  xmm1 = xmm1 - a1 * set( B(k,j ) );
1662  xmm2 = xmm2 - a1 * set( B(k,j+1UL) );
1663  }
1664  store( &(~C)(i,j ), xmm1 );
1665  store( &(~C)(i,j+1UL), xmm2 );
1666  }
1667  if( j < N ) {
1668  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1669  for( size_t k=0UL; k<K; ++k ) {
1670  xmm1 = xmm1 - A.get(i,k) * set( B(k,j) );
1671  }
1672  store( &(~C)(i,j), xmm1 );
1673  }
1674  }
1675  }
1677  //**********************************************************************************************
1678 
1679  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1693  template< typename MT3 // Type of the left-hand side target matrix
1694  , typename MT4 // Type of the left-hand side matrix operand
1695  , typename MT5 > // Type of the right-hand side matrix operand
1696  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1697  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1698  {
1699  selectDefaultSubAssignKernel( C, A, B );
1700  }
1702  //**********************************************************************************************
1703 
1704  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1705 #if BLAZE_BLAS_MODE
1706 
1719  template< typename MT3 // Type of the left-hand side target matrix
1720  , typename MT4 // Type of the left-hand side matrix operand
1721  , typename MT5 > // Type of the right-hand side matrix operand
1722  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1723  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1724  {
1725  using boost::numeric_cast;
1726 
1727  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1728  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1729  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1730 
1731  const int M ( numeric_cast<int>( A.rows() ) );
1732  const int N ( numeric_cast<int>( B.columns() ) );
1733  const int K ( numeric_cast<int>( A.columns() ) );
1734  const int lda( numeric_cast<int>( A.spacing() ) );
1735  const int ldb( numeric_cast<int>( B.spacing() ) );
1736  const int ldc( numeric_cast<int>( C.spacing() ) );
1737 
1738  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1739  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1740  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1741  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1742  }
1744 #endif
1745  //**********************************************************************************************
1746 
1747  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1748 #if BLAZE_BLAS_MODE
1749 
1762  template< typename MT3 // Type of the left-hand side target matrix
1763  , typename MT4 // Type of the left-hand side matrix operand
1764  , typename MT5 > // Type of the right-hand side matrix operand
1765  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1766  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1767  {
1768  using boost::numeric_cast;
1769 
1770  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1771  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1772  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1773 
1774  const int M ( numeric_cast<int>( A.rows() ) );
1775  const int N ( numeric_cast<int>( B.columns() ) );
1776  const int K ( numeric_cast<int>( A.columns() ) );
1777  const int lda( numeric_cast<int>( A.spacing() ) );
1778  const int ldb( numeric_cast<int>( B.spacing() ) );
1779  const int ldc( numeric_cast<int>( C.spacing() ) );
1780 
1781  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1782  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1783  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1784  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1785  }
1787 #endif
1788  //**********************************************************************************************
1789 
1790  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1791 #if BLAZE_BLAS_MODE
1792 
1805  template< typename MT3 // Type of the left-hand side target matrix
1806  , typename MT4 // Type of the left-hand side matrix operand
1807  , typename MT5 > // Type of the right-hand side matrix operand
1808  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1809  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1810  {
1811  using boost::numeric_cast;
1812 
1813  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1814  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1815  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1816  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1817  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1818  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1819 
1820  const int M ( numeric_cast<int>( A.rows() ) );
1821  const int N ( numeric_cast<int>( B.columns() ) );
1822  const int K ( numeric_cast<int>( A.columns() ) );
1823  const int lda( numeric_cast<int>( A.spacing() ) );
1824  const int ldb( numeric_cast<int>( B.spacing() ) );
1825  const int ldc( numeric_cast<int>( C.spacing() ) );
1826  const complex<float> alpha( -1.0F, 0.0F );
1827  const complex<float> beta ( 1.0F, 0.0F );
1828 
1829  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1830  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1831  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1832  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1833  }
1835 #endif
1836  //**********************************************************************************************
1837 
1838  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1839 #if BLAZE_BLAS_MODE
1840 
1853  template< typename MT3 // Type of the left-hand side target matrix
1854  , typename MT4 // Type of the left-hand side matrix operand
1855  , typename MT5 > // Type of the right-hand side matrix operand
1856  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1857  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1858  {
1859  using boost::numeric_cast;
1860 
1861  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1862  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1863  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1864  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1865  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1866  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1867 
1868  const int M ( numeric_cast<int>( A.rows() ) );
1869  const int N ( numeric_cast<int>( B.columns() ) );
1870  const int K ( numeric_cast<int>( A.columns() ) );
1871  const int lda( numeric_cast<int>( A.spacing() ) );
1872  const int ldb( numeric_cast<int>( B.spacing() ) );
1873  const int ldc( numeric_cast<int>( C.spacing() ) );
1874  const complex<double> alpha( -1.0, 0.0 );
1875  const complex<double> beta ( 1.0, 0.0 );
1876 
1877  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1878  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1879  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1880  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1881  }
1883 #endif
1884  //**********************************************************************************************
1885 
1886  //**Subtraction assignment to sparse matrices***************************************************
1887  // No special implementation for the subtraction assignment to sparse matrices.
1888  //**********************************************************************************************
1889 
1890  //**Multiplication assignment to dense matrices*************************************************
1891  // No special implementation for the multiplication assignment to dense matrices.
1892  //**********************************************************************************************
1893 
1894  //**Multiplication assignment to sparse matrices************************************************
1895  // No special implementation for the multiplication assignment to sparse matrices.
1896  //**********************************************************************************************
1897 
1898  //**Compile time checks*************************************************************************
1905  //**********************************************************************************************
1906 };
1907 //*************************************************************************************************
1908 
1909 
1910 
1911 
1912 //=================================================================================================
1913 //
1914 // DMATSCALARMULTEXPR SPECIALIZATION
1915 //
1916 //=================================================================================================
1917 
1918 //*************************************************************************************************
1926 template< typename MT1 // Type of the left-hand side dense matrix
1927  , typename MT2 // Type of the right-hand side dense matrix
1928  , typename ST > // Type of the right-hand side scalar value
1929 class DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >
1930  : public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
1931  , private MatScalarMultExpr
1932  , private Computation
1933 {
1934  private:
1935  //**Type definitions****************************************************************************
1936  typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
1937  typedef typename MMM::ResultType RES;
1938  typedef typename MT1::ResultType RT1;
1939  typedef typename MT2::ResultType RT2;
1940  typedef typename MT1::CompositeType CT1;
1941  typedef typename MT2::CompositeType CT2;
1942  //**********************************************************************************************
1943 
1944  //**********************************************************************************************
1946 
1949  template< typename T1, typename T2, typename T3, typename T4 >
1950  struct UseSinglePrecisionKernel {
1951  enum { value = IsFloat<typename T1::ElementType>::value &&
1952  IsFloat<typename T2::ElementType>::value &&
1953  IsFloat<typename T3::ElementType>::value &&
1954  !IsComplex<T4>::value };
1955  };
1956  //**********************************************************************************************
1957 
1958  //**********************************************************************************************
1960 
1963  template< typename T1, typename T2, typename T3, typename T4 >
1964  struct UseDoublePrecisionKernel {
1965  enum { value = IsDouble<typename T1::ElementType>::value &&
1966  IsDouble<typename T2::ElementType>::value &&
1967  IsDouble<typename T3::ElementType>::value &&
1968  !IsComplex<T4>::value };
1969  };
1970  //**********************************************************************************************
1971 
1972  //**********************************************************************************************
1974 
1977  template< typename T1, typename T2, typename T3 >
1978  struct UseSinglePrecisionComplexKernel {
1979  typedef complex<float> Type;
1980  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1981  IsSame<typename T2::ElementType,Type>::value &&
1982  IsSame<typename T3::ElementType,Type>::value };
1983  };
1984  //**********************************************************************************************
1985 
1986  //**********************************************************************************************
1988 
1991  template< typename T1, typename T2, typename T3 >
1992  struct UseDoublePrecisionComplexKernel {
1993  typedef complex<double> Type;
1994  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1995  IsSame<typename T2::ElementType,Type>::value &&
1996  IsSame<typename T3::ElementType,Type>::value };
1997  };
1998  //**********************************************************************************************
1999 
2000  //**********************************************************************************************
2002 
2004  template< typename T1, typename T2, typename T3, typename T4 >
2005  struct UseDefaultKernel {
2006  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2007  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2008  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2009  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2010  };
2011  //**********************************************************************************************
2012 
2013  //**********************************************************************************************
2015 
2017  template< typename T1, typename T2, typename T3, typename T4 >
2018  struct UseVectorizedDefaultKernel {
2019  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2020  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2021  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2022  IsSame<typename T1::ElementType,T4>::value &&
2023  IntrinsicTrait<typename T1::ElementType>::addition &&
2024  IntrinsicTrait<typename T1::ElementType>::multiplication };
2025  };
2026  //**********************************************************************************************
2027 
2028  public:
2029  //**Type definitions****************************************************************************
2030  typedef DMatScalarMultExpr<MMM,ST,true> This;
2031  typedef typename MultTrait<RES,ST>::Type ResultType;
2032  typedef typename ResultType::OppositeType OppositeType;
2033  typedef typename ResultType::TransposeType TransposeType;
2034  typedef typename ResultType::ElementType ElementType;
2035  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2036  typedef const ElementType ReturnType;
2037  typedef const ResultType CompositeType;
2038 
2040  typedef const TDMatTDMatMultExpr<MT1,MT2> LeftOperand;
2041 
2043  typedef ST RightOperand;
2044 
2046  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
2047 
2049  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
2050  //**********************************************************************************************
2051 
2052  //**Compilation flags***************************************************************************
2054  enum { vectorizable = 0 };
2055  //**********************************************************************************************
2056 
2057  //**Constructor*********************************************************************************
2063  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2064  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2065  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2066  {}
2067  //**********************************************************************************************
2068 
2069  //**Access operator*****************************************************************************
2076  inline ReturnType operator()( size_t i, size_t j ) const {
2077  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2078  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2079  return matrix_(i,j) * scalar_;
2080  }
2081  //**********************************************************************************************
2082 
2083  //**Rows function*******************************************************************************
2088  inline size_t rows() const {
2089  return matrix_.rows();
2090  }
2091  //**********************************************************************************************
2092 
2093  //**Columns function****************************************************************************
2098  inline size_t columns() const {
2099  return matrix_.columns();
2100  }
2101  //**********************************************************************************************
2102 
2103  //**Left operand access*************************************************************************
2108  inline LeftOperand leftOperand() const {
2109  return matrix_;
2110  }
2111  //**********************************************************************************************
2112 
2113  //**Right operand access************************************************************************
2118  inline RightOperand rightOperand() const {
2119  return scalar_;
2120  }
2121  //**********************************************************************************************
2122 
2123  //**********************************************************************************************
2129  template< typename T >
2130  inline bool canAlias( const T* alias ) const {
2131  return matrix_.canAlias( alias );
2132  }
2133  //**********************************************************************************************
2134 
2135  //**********************************************************************************************
2141  template< typename T >
2142  inline bool isAliased( const T* alias ) const {
2143  return matrix_.isAliased( alias );
2144  }
2145  //**********************************************************************************************
2146 
2147  private:
2148  //**Member variables****************************************************************************
2149  LeftOperand matrix_;
2150  RightOperand scalar_;
2151  //**********************************************************************************************
2152 
2153  //**Assignment to dense matrices****************************************************************
2162  template< typename MT3 // Type of the target dense matrix
2163  , bool SO > // Storage order of the target dense matrix
2164  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2165  {
2167 
2168  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2169  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2170 
2171  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2172  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2173 
2174  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2175  return;
2176  }
2177  else if( left.columns() == 0UL ) {
2178  reset( ~lhs );
2179  return;
2180  }
2181 
2182  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2183  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2184 
2185  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2186  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2187  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2188  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2189  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2190  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2191 
2192  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
2193  DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2194  else
2195  DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2196  }
2197  //**********************************************************************************************
2198 
2199  //**Default assignment to dense matrices********************************************************
2213  template< typename MT3 // Type of the left-hand side target matrix
2214  , typename MT4 // Type of the left-hand side matrix operand
2215  , typename MT5 // Type of the right-hand side matrix operand
2216  , typename ST2 > // Type of the scalar value
2217  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2218  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2219  {
2220  for( size_t i=0UL; i<A.rows(); ++i ) {
2221  for( size_t k=0UL; k<B.columns(); ++k ) {
2222  C(i,k) = A(i,0UL) * B(0UL,k);
2223  }
2224  for( size_t j=1UL; j<A.columns(); ++j ) {
2225  for( size_t k=0UL; k<B.columns(); ++k ) {
2226  C(i,k) += A(i,j) * B(j,k);
2227  }
2228  }
2229  for( size_t k=0UL; k<B.columns(); ++k ) {
2230  C(i,k) *= scalar;
2231  }
2232  }
2233  }
2234  //**********************************************************************************************
2235 
2236  //**Vectorized default assignment to row-major dense matrices***********************************
2250  template< typename MT3 // Type of the left-hand side target matrix
2251  , typename MT4 // Type of the left-hand side matrix operand
2252  , typename MT5 // Type of the right-hand side matrix operand
2253  , typename ST2 > // Type of the scalar value
2254  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2255  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2256  {
2257  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2258  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2259 
2260  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2261  const typename MT5::OppositeType tmp( B );
2262  assign( ~C, A * tmp * scalar );
2263  }
2264  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2265  const typename MT4::OppositeType tmp( A );
2266  assign( ~C, tmp * B * scalar );
2267  }
2268  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2269  const typename MT5::OppositeType tmp( B );
2270  assign( ~C, A * tmp * scalar );
2271  }
2272  else {
2273  const typename MT4::OppositeType tmp( A );
2274  assign( ~C, tmp * B * scalar );
2275  }
2276  }
2277  //**********************************************************************************************
2278 
2279  //**Vectorized default assignment to column-major dense matrices********************************
2293  template< typename MT3 // Type of the left-hand side target matrix
2294  , typename MT4 // Type of the left-hand side matrix operand
2295  , typename MT5 // Type of the right-hand side matrix operand
2296  , typename ST2 > // Type of the scalar value
2297  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2298  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2299  {
2300  typedef IntrinsicTrait<ElementType> IT;
2301 
2302  const size_t M( A.spacing() );
2303  const size_t N( B.columns() );
2304  const size_t K( A.columns() );
2305 
2306  const IntrinsicType factor( set( scalar ) );
2307 
2308  size_t i( 0UL );
2309 
2310  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2311  for( size_t j=0UL; j<N; ++j ) {
2312  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2313  for( size_t k=0UL; k<K; ++k ) {
2314  const IntrinsicType b1( set( B(k,j) ) );
2315  xmm1 = xmm1 + A.get(i ,k) * b1;
2316  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2317  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2318  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2319  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
2320  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
2321  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
2322  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
2323  }
2324  store( &(~C)(i ,j), xmm1 * factor );
2325  store( &(~C)(i+IT::size ,j), xmm2 * factor );
2326  store( &(~C)(i+IT::size*2UL,j), xmm3 * factor );
2327  store( &(~C)(i+IT::size*3UL,j), xmm4 * factor );
2328  store( &(~C)(i+IT::size*4UL,j), xmm5 * factor );
2329  store( &(~C)(i+IT::size*5UL,j), xmm6 * factor );
2330  store( &(~C)(i+IT::size*6UL,j), xmm7 * factor );
2331  store( &(~C)(i+IT::size*7UL,j), xmm8 * factor );
2332  }
2333  }
2334  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2335  size_t j( 0UL );
2336  for( ; (j+2UL) <= N; j+=2UL ) {
2337  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2338  for( size_t k=0UL; k<K; ++k ) {
2339  const IntrinsicType a1( A.get(i ,k) );
2340  const IntrinsicType a2( A.get(i+IT::size ,k) );
2341  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
2342  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
2343  const IntrinsicType b1( set( B(k,j ) ) );
2344  const IntrinsicType b2( set( B(k,j+1UL) ) );
2345  xmm1 = xmm1 + a1 * b1;
2346  xmm2 = xmm2 + a2 * b1;
2347  xmm3 = xmm3 + a3 * b1;
2348  xmm4 = xmm4 + a4 * b1;
2349  xmm5 = xmm5 + a1 * b2;
2350  xmm6 = xmm6 + a2 * b2;
2351  xmm7 = xmm7 + a3 * b2;
2352  xmm8 = xmm8 + a4 * b2;
2353  }
2354  store( &(~C)(i ,j ), xmm1 * factor );
2355  store( &(~C)(i+IT::size ,j ), xmm2 * factor );
2356  store( &(~C)(i+IT::size*2UL,j ), xmm3 * factor );
2357  store( &(~C)(i+IT::size*3UL,j ), xmm4 * factor );
2358  store( &(~C)(i ,j+1UL), xmm5 * factor );
2359  store( &(~C)(i+IT::size ,j+1UL), xmm6 * factor );
2360  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 * factor );
2361  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 * factor );
2362  }
2363  if( j < N ) {
2364  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2365  for( size_t k=0UL; k<K; ++k ) {
2366  const IntrinsicType b1( set( B(k,j) ) );
2367  xmm1 = xmm1 + A.get(i ,k) * b1;
2368  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2369  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2370  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2371  }
2372  store( &(~C)(i ,j), xmm1 * factor );
2373  store( &(~C)(i+IT::size ,j), xmm2 * factor );
2374  store( &(~C)(i+IT::size*2UL,j), xmm3 * factor );
2375  store( &(~C)(i+IT::size*3UL,j), xmm4 * factor );
2376  }
2377  }
2378  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2379  size_t j( 0UL );
2380  for( ; (j+2UL) <= N; j+=2UL ) {
2381  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2382  for( size_t k=0UL; k<K; ++k ) {
2383  const IntrinsicType a1( A.get(i ,k) );
2384  const IntrinsicType a2( A.get(i+IT::size,k) );
2385  const IntrinsicType b1( set( B(k,j ) ) );
2386  const IntrinsicType b2( set( B(k,j+1UL) ) );
2387  xmm1 = xmm1 + a1 * b1;
2388  xmm2 = xmm2 + a2 * b1;
2389  xmm3 = xmm3 + a1 * b2;
2390  xmm4 = xmm4 + a2 * b2;
2391  }
2392  store( &(~C)(i ,j ), xmm1 * factor );
2393  store( &(~C)(i+IT::size,j ), xmm2 * factor );
2394  store( &(~C)(i ,j+1UL), xmm3 * factor );
2395  store( &(~C)(i+IT::size,j+1UL), xmm4 * factor );
2396  }
2397  if( j < N ) {
2398  IntrinsicType xmm1, xmm2;
2399  for( size_t k=0UL; k<K; ++k ) {
2400  const IntrinsicType b1( set( B(k,j) ) );
2401  xmm1 = xmm1 + A.get(i ,k) * b1;
2402  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
2403  }
2404  store( &(~C)(i ,j), xmm1 * factor );
2405  store( &(~C)(i+IT::size,j), xmm2 * factor );
2406  }
2407  }
2408  if( i < M ) {
2409  size_t j( 0UL );
2410  for( ; (j+2UL) <= N; j+=2UL ) {
2411  IntrinsicType xmm1, xmm2;
2412  for( size_t k=0UL; k<K; ++k ) {
2413  const IntrinsicType a1( A.get(i,k) );
2414  xmm1 = xmm1 + a1 * set( B(k,j ) );
2415  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2416  }
2417  store( &(~C)(i,j ), xmm1 * factor );
2418  store( &(~C)(i,j+1UL), xmm2 * factor );
2419  }
2420  if( j < N ) {
2421  IntrinsicType xmm1;
2422  for( size_t k=0UL; k<K; ++k ) {
2423  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
2424  }
2425  store( &(~C)(i,j), xmm1 * factor );
2426  }
2427  }
2428  }
2429  //**********************************************************************************************
2430 
2431  //**BLAS-based assignment to dense matrices (default)*******************************************
2445  template< typename MT3 // Type of the left-hand side target matrix
2446  , typename MT4 // Type of the left-hand side matrix operand
2447  , typename MT5 // Type of the right-hand side matrix operand
2448  , typename ST2 > // Type of the scalar value
2449  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2450  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2451  {
2452  selectDefaultAssignKernel( C, A, B, scalar );
2453  }
2454  //**********************************************************************************************
2455 
2456  //**BLAS-based assignment to dense matrices (single precision)**********************************
2457 #if BLAZE_BLAS_MODE
2458 
2471  template< typename MT3 // Type of the left-hand side target matrix
2472  , typename MT4 // Type of the left-hand side matrix operand
2473  , typename MT5 // Type of the right-hand side matrix operand
2474  , typename ST2 > // Type of the scalar value
2475  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2476  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2477  {
2478  using boost::numeric_cast;
2479 
2480  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2481  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2482  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2483 
2484  const int M ( numeric_cast<int>( A.rows() ) );
2485  const int N ( numeric_cast<int>( B.columns() ) );
2486  const int K ( numeric_cast<int>( A.columns() ) );
2487  const int lda( numeric_cast<int>( A.spacing() ) );
2488  const int ldb( numeric_cast<int>( B.spacing() ) );
2489  const int ldc( numeric_cast<int>( C.spacing() ) );
2490 
2491  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2492  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2493  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2494  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2495  }
2496 #endif
2497  //**********************************************************************************************
2498 
2499  //**BLAS-based assignment to dense matrices (double precision)**********************************
2500 #if BLAZE_BLAS_MODE
2501 
2514  template< typename MT3 // Type of the left-hand side target matrix
2515  , typename MT4 // Type of the left-hand side matrix operand
2516  , typename MT5 // Type of the right-hand side matrix operand
2517  , typename ST2 > // Type of the scalar value
2518  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2519  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2520  {
2521  using boost::numeric_cast;
2522 
2523  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
2524  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
2525  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
2526 
2527  const int M ( numeric_cast<int>( A.rows() ) );
2528  const int N ( numeric_cast<int>( B.columns() ) );
2529  const int K ( numeric_cast<int>( A.columns() ) );
2530  const int lda( numeric_cast<int>( A.spacing() ) );
2531  const int ldb( numeric_cast<int>( B.spacing() ) );
2532  const int ldc( numeric_cast<int>( C.spacing() ) );
2533 
2534  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2535  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2536  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2537  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2538  }
2539 #endif
2540  //**********************************************************************************************
2541 
2542  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2543 #if BLAZE_BLAS_MODE
2544 
2557  template< typename MT3 // Type of the left-hand side target matrix
2558  , typename MT4 // Type of the left-hand side matrix operand
2559  , typename MT5 // Type of the right-hand side matrix operand
2560  , typename ST2 > // Type of the scalar value
2561  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2562  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2563  {
2564  using boost::numeric_cast;
2565 
2566  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2567  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2568  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2569  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2570  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2571  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2572 
2573  const int M ( numeric_cast<int>( A.rows() ) );
2574  const int N ( numeric_cast<int>( B.columns() ) );
2575  const int K ( numeric_cast<int>( A.columns() ) );
2576  const int lda( numeric_cast<int>( A.spacing() ) );
2577  const int ldb( numeric_cast<int>( B.spacing() ) );
2578  const int ldc( numeric_cast<int>( C.spacing() ) );
2579  const complex<float> alpha( scalar );
2580  const complex<float> beta ( 0.0F, 0.0F );
2581 
2582  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2583  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2584  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2585  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2586  }
2587 #endif
2588  //**********************************************************************************************
2589 
2590  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2591 #if BLAZE_BLAS_MODE
2592 
2605  template< typename MT3 // Type of the left-hand side target matrix
2606  , typename MT4 // Type of the left-hand side matrix operand
2607  , typename MT5 // Type of the right-hand side matrix operand
2608  , typename ST2 > // Type of the scalar value
2609  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2610  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2611  {
2612  using boost::numeric_cast;
2613 
2614  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2615  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2616  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2617  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2618  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2619  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2620 
2621  const int M ( numeric_cast<int>( A.rows() ) );
2622  const int N ( numeric_cast<int>( B.columns() ) );
2623  const int K ( numeric_cast<int>( A.columns() ) );
2624  const int lda( numeric_cast<int>( A.spacing() ) );
2625  const int ldb( numeric_cast<int>( B.spacing() ) );
2626  const int ldc( numeric_cast<int>( C.spacing() ) );
2627  const complex<double> alpha( scalar );
2628  const complex<double> beta ( 0.0, 0.0 );
2629 
2630  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2631  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2632  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2633  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2634  }
2635 #endif
2636  //**********************************************************************************************
2637 
2638  //**Assignment to sparse matrices***************************************************************
2650  template< typename MT // Type of the target sparse matrix
2651  , bool SO > // Storage order of the target sparse matrix
2652  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2653  {
2655 
2656  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2657 
2663  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
2664 
2665  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2666  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2667 
2668  const TmpType tmp( rhs );
2669  assign( ~lhs, tmp );
2670  }
2671  //**********************************************************************************************
2672 
2673  //**Addition assignment to dense matrices*******************************************************
2685  template< typename MT3 // Type of the target dense matrix
2686  , bool SO > // Storage order of the target dense matrix
2687  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2688  {
2690 
2691  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2692  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2693 
2694  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2695  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2696 
2697  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2698  return;
2699  }
2700 
2701  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2702  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2703 
2704  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2705  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2706  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2707  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2708  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2709  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2710 
2711  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
2712  DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2713  else
2714  DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2715  }
2716  //**********************************************************************************************
2717 
2718  //**Default addition assignment to dense matrices***********************************************
2732  template< typename MT3 // Type of the left-hand side target matrix
2733  , typename MT4 // Type of the left-hand side matrix operand
2734  , typename MT5 // Type of the right-hand side matrix operand
2735  , typename ST2 > // Type of the scalar value
2736  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2737  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2738  {
2739  const ResultType tmp( A * B * scalar );
2740  addAssign( C, tmp );
2741  }
2742  //**********************************************************************************************
2743 
2744  //**Vectorized default addition assignment to row-major dense matrices**************************
2758  template< typename MT3 // Type of the left-hand side target matrix
2759  , typename MT4 // Type of the left-hand side matrix operand
2760  , typename MT5 // Type of the right-hand side matrix operand
2761  , typename ST2 > // Type of the scalar value
2762  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2763  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2764  {
2765  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2766  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2767 
2768  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2769  const typename MT5::OppositeType tmp( B );
2770  addAssign( ~C, A * tmp * scalar );
2771  }
2772  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2773  const typename MT4::OppositeType tmp( A );
2774  addAssign( ~C, tmp * B * scalar );
2775  }
2776  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2777  const typename MT5::OppositeType tmp( B );
2778  addAssign( ~C, A * tmp * scalar );
2779  }
2780  else {
2781  const typename MT4::OppositeType tmp( A );
2782  addAssign( ~C, tmp * B * scalar );
2783  }
2784  }
2785  //**********************************************************************************************
2786 
2787  //**Vectorized default addition assignment to column-major dense matrices***********************
2801  template< typename MT3 // Type of the left-hand side target matrix
2802  , typename MT4 // Type of the left-hand side matrix operand
2803  , typename MT5 // Type of the right-hand side matrix operand
2804  , typename ST2 > // Type of the scalar value
2805  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2806  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2807  {
2808  typedef IntrinsicTrait<ElementType> IT;
2809 
2810  const size_t M( A.spacing() );
2811  const size_t N( B.columns() );
2812  const size_t K( A.columns() );
2813 
2814  const IntrinsicType factor( set( scalar ) );
2815 
2816  size_t i( 0UL );
2817 
2818  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2819  for( size_t j=0UL; j<N; ++j ) {
2820  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2821  for( size_t k=0UL; k<K; ++k ) {
2822  const IntrinsicType b1( set( B(k,j) ) );
2823  xmm1 = xmm1 + A.get(i ,k) * b1;
2824  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2825  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2826  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2827  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
2828  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
2829  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
2830  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
2831  }
2832  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2833  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) + xmm2 * factor );
2834  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) + xmm3 * factor );
2835  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) + xmm4 * factor );
2836  store( &(~C)(i+IT::size*4UL,j), load( &(~C)(i+IT::size*4UL,j) ) + xmm5 * factor );
2837  store( &(~C)(i+IT::size*5UL,j), load( &(~C)(i+IT::size*5UL,j) ) + xmm6 * factor );
2838  store( &(~C)(i+IT::size*6UL,j), load( &(~C)(i+IT::size*6UL,j) ) + xmm7 * factor );
2839  store( &(~C)(i+IT::size*7UL,j), load( &(~C)(i+IT::size*7UL,j) ) + xmm8 * factor );
2840  }
2841  }
2842  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2843  size_t j( 0UL );
2844  for( ; (j+2UL) <= N; j+=2UL ) {
2845  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2846  for( size_t k=0UL; k<K; ++k ) {
2847  const IntrinsicType a1( A.get(i ,k) );
2848  const IntrinsicType a2( A.get(i+IT::size ,k) );
2849  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
2850  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
2851  const IntrinsicType b1( set( B(k,j ) ) );
2852  const IntrinsicType b2( set( B(k,j+1UL) ) );
2853  xmm1 = xmm1 + a1 * b1;
2854  xmm2 = xmm2 + a2 * b1;
2855  xmm3 = xmm3 + a3 * b1;
2856  xmm4 = xmm4 + a4 * b1;
2857  xmm5 = xmm5 + a1 * b2;
2858  xmm6 = xmm6 + a2 * b2;
2859  xmm7 = xmm7 + a3 * b2;
2860  xmm8 = xmm8 + a4 * b2;
2861  }
2862  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2863  store( &(~C)(i+IT::size ,j ), load( &(~C)(i+IT::size ,j ) ) + xmm2 * factor );
2864  store( &(~C)(i+IT::size*2UL,j ), load( &(~C)(i+IT::size*2UL,j ) ) + xmm3 * factor );
2865  store( &(~C)(i+IT::size*3UL,j ), load( &(~C)(i+IT::size*3UL,j ) ) + xmm4 * factor );
2866  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) + xmm5 * factor );
2867  store( &(~C)(i+IT::size ,j+1UL), load( &(~C)(i+IT::size ,j+1UL) ) + xmm6 * factor );
2868  store( &(~C)(i+IT::size*2UL,j+1UL), load( &(~C)(i+IT::size*2UL,j+1UL) ) + xmm7 * factor );
2869  store( &(~C)(i+IT::size*3UL,j+1UL), load( &(~C)(i+IT::size*3UL,j+1UL) ) + xmm8 * factor );
2870  }
2871  if( j < N ) {
2872  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2873  for( size_t k=0UL; k<K; ++k ) {
2874  const IntrinsicType b1( set( B(k,j) ) );
2875  xmm1 = xmm1 + A.get(i ,k) * b1;
2876  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2877  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2878  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2879  }
2880  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2881  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) + xmm2 * factor );
2882  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) + xmm3 * factor );
2883  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) + xmm4 * factor );
2884  }
2885  }
2886  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2887  size_t j( 0UL );
2888  for( ; (j+2UL) <= N; j+=2UL ) {
2889  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2890  for( size_t k=0UL; k<K; ++k ) {
2891  const IntrinsicType a1( A.get(i ,k) );
2892  const IntrinsicType a2( A.get(i+IT::size,k) );
2893  const IntrinsicType b1( set( B(k,j ) ) );
2894  const IntrinsicType b2( set( B(k,j+1UL) ) );
2895  xmm1 = xmm1 + a1 * b1;
2896  xmm2 = xmm2 + a2 * b1;
2897  xmm3 = xmm3 + a1 * b2;
2898  xmm4 = xmm4 + a2 * b2;
2899  }
2900  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2901  store( &(~C)(i+IT::size,j ), load( &(~C)(i+IT::size,j ) ) + xmm2 * factor );
2902  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) + xmm3 * factor );
2903  store( &(~C)(i+IT::size,j+1UL), load( &(~C)(i+IT::size,j+1UL) ) + xmm4 * factor );
2904  }
2905  if( j < N ) {
2906  IntrinsicType xmm1, xmm2;
2907  for( size_t k=0UL; k<K; ++k ) {
2908  const IntrinsicType b1( set( B(k,j) ) );
2909  xmm1 = xmm1 + A.get(i ,k) * b1;
2910  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
2911  }
2912  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2913  store( &(~C)(i+IT::size,j), load( &(~C)(i+IT::size,j) ) + xmm2 * factor );
2914  }
2915  }
2916  if( i < M ) {
2917  size_t j( 0UL );
2918  for( ; (j+2UL) <= N; j+=2UL ) {
2919  IntrinsicType xmm1, xmm2;
2920  for( size_t k=0UL; k<K; ++k ) {
2921  const IntrinsicType a1( A.get(i,k) );
2922  xmm1 = xmm1 + a1 * set( B(k,j ) );
2923  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2924  }
2925  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2926  store( &(~C)(i,j+1UL), load( &(~C)(i,j+1UL) ) + xmm2 * factor );
2927  }
2928  if( j < N ) {
2929  IntrinsicType xmm1;
2930  for( size_t k=0UL; k<K; ++k ) {
2931  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
2932  }
2933  store( &(~C)(i,j), load( &(~C)(i,j) ) + xmm1 * factor );
2934  }
2935  }
2936  }
2937  //**********************************************************************************************
2938 
2939  //**BLAS-based addition assignment to dense matrices (default)**********************************
2953  template< typename MT3 // Type of the left-hand side target matrix
2954  , typename MT4 // Type of the left-hand side matrix operand
2955  , typename MT5 // Type of the right-hand side matrix operand
2956  , typename ST2 > // Type of the scalar value
2957  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2958  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2959  {
2960  selectDefaultAddAssignKernel( C, A, B, scalar );
2961  }
2962  //**********************************************************************************************
2963 
2964  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2965 #if BLAZE_BLAS_MODE
2966 
2979  template< typename MT3 // Type of the left-hand side target matrix
2980  , typename MT4 // Type of the left-hand side matrix operand
2981  , typename MT5 // Type of the right-hand side matrix operand
2982  , typename ST2 > // Type of the scalar value
2983  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2984  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2985  {
2986  using boost::numeric_cast;
2987 
2988  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2989  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2990  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2991 
2992  const int M ( numeric_cast<int>( A.rows() ) );
2993  const int N ( numeric_cast<int>( B.columns() ) );
2994  const int K ( numeric_cast<int>( A.columns() ) );
2995  const int lda( numeric_cast<int>( A.spacing() ) );
2996  const int ldb( numeric_cast<int>( B.spacing() ) );
2997  const int ldc( numeric_cast<int>( C.spacing() ) );
2998 
2999  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3000  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3001  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3002  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3003  }
3004 #endif
3005  //**********************************************************************************************
3006 
3007  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3008 #if BLAZE_BLAS_MODE
3009 
3022  template< typename MT3 // Type of the left-hand side target matrix
3023  , typename MT4 // Type of the left-hand side matrix operand
3024  , typename MT5 // Type of the right-hand side matrix operand
3025  , typename ST2 > // Type of the scalar value
3026  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3027  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3028  {
3029  using boost::numeric_cast;
3030 
3031  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3032  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3033  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3034 
3035  const int M ( numeric_cast<int>( A.rows() ) );
3036  const int N ( numeric_cast<int>( B.columns() ) );
3037  const int K ( numeric_cast<int>( A.columns() ) );
3038  const int lda( numeric_cast<int>( A.spacing() ) );
3039  const int ldb( numeric_cast<int>( B.spacing() ) );
3040  const int ldc( numeric_cast<int>( C.spacing() ) );
3041 
3042  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3043  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3044  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3045  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3046  }
3047 #endif
3048  //**********************************************************************************************
3049 
3050  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3051 #if BLAZE_BLAS_MODE
3052 
3065  template< typename MT3 // Type of the left-hand side target matrix
3066  , typename MT4 // Type of the left-hand side matrix operand
3067  , typename MT5 // Type of the right-hand side matrix operand
3068  , typename ST2 > // Type of the scalar value
3069  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3070  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3071  {
3072  using boost::numeric_cast;
3073 
3074  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3075  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3076  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3077  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3078  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3079  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3080 
3081  const int M ( numeric_cast<int>( A.rows() ) );
3082  const int N ( numeric_cast<int>( B.columns() ) );
3083  const int K ( numeric_cast<int>( A.columns() ) );
3084  const int lda( numeric_cast<int>( A.spacing() ) );
3085  const int ldb( numeric_cast<int>( B.spacing() ) );
3086  const int ldc( numeric_cast<int>( C.spacing() ) );
3087  const complex<float> alpha( scalar );
3088  const complex<float> beta ( 1.0F, 0.0F );
3089 
3090  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3091  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3092  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3093  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3094  }
3095 #endif
3096  //**********************************************************************************************
3097 
3098  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3099 #if BLAZE_BLAS_MODE
3100 
3113  template< typename MT3 // Type of the left-hand side target matrix
3114  , typename MT4 // Type of the left-hand side matrix operand
3115  , typename MT5 // Type of the right-hand side matrix operand
3116  , typename ST2 > // Type of the scalar value
3117  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3118  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3119  {
3120  using boost::numeric_cast;
3121 
3122  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3123  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3124  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3125  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3126  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3127  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3128 
3129  const int M ( numeric_cast<int>( A.rows() ) );
3130  const int N ( numeric_cast<int>( B.columns() ) );
3131  const int K ( numeric_cast<int>( A.columns() ) );
3132  const int lda( numeric_cast<int>( A.spacing() ) );
3133  const int ldb( numeric_cast<int>( B.spacing() ) );
3134  const int ldc( numeric_cast<int>( C.spacing() ) );
3135  const complex<double> alpha( scalar );
3136  const complex<double> beta ( 1.0, 0.0 );
3137 
3138  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3139  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3140  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3141  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3142  }
3143 #endif
3144  //**********************************************************************************************
3145 
3146  //**Addition assignment to sparse matrices******************************************************
3147  // No special implementation for the addition assignment to sparse matrices.
3148  //**********************************************************************************************
3149 
3150  //**Subtraction assignment to dense matrices****************************************************
3162  template< typename MT3 // Type of the target dense matrix
3163  , bool SO > // Storage order of the target dense matrix
3164  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3165  {
3167 
3168  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3169  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3170 
3171  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3172  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3173 
3174  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3175  return;
3176  }
3177 
3178  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3179  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3180 
3181  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3182  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3183  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3184  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3185  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3186  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3187 
3188  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
3189  DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3190  else
3191  DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3192  }
3193  //**********************************************************************************************
3194 
3195  //**Default subtraction assignment to dense matrices********************************************
3209  template< typename MT3 // Type of the left-hand side target matrix
3210  , typename MT4 // Type of the left-hand side matrix operand
3211  , typename MT5 // Type of the right-hand side matrix operand
3212  , typename ST2 > // Type of the scalar value
3213  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3214  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3215  {
3216  const ResultType tmp( A * B * scalar );
3217  subAssign( C, tmp );
3218  }
3219  //**********************************************************************************************
3220 
3221  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3235  template< typename MT3 // Type of the left-hand side target matrix
3236  , typename MT4 // Type of the left-hand side matrix operand
3237  , typename MT5 // Type of the right-hand side matrix operand
3238  , typename ST2 > // Type of the scalar value
3239  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3240  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3241  {
3242  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
3243  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
3244 
3245  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3246  const typename MT5::OppositeType tmp( B );
3247  subAssign( ~C, A * tmp * scalar );
3248  }
3249  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3250  const typename MT4::OppositeType tmp( A );
3251  subAssign( ~C, tmp * B * scalar );
3252  }
3253  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3254  const typename MT5::OppositeType tmp( B );
3255  subAssign( ~C, A * tmp * scalar );
3256  }
3257  else {
3258  const typename MT4::OppositeType tmp( A );
3259  subAssign( ~C, tmp * B * scalar );
3260  }
3261  }
3262  //**********************************************************************************************
3263 
3264  //**Vectorized default subtraction assignment to column-major dense matrices********************
3278  template< typename MT3 // Type of the left-hand side target matrix
3279  , typename MT4 // Type of the left-hand side matrix operand
3280  , typename MT5 // Type of the right-hand side matrix operand
3281  , typename ST2 > // Type of the scalar value
3282  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3283  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3284  {
3285  typedef IntrinsicTrait<ElementType> IT;
3286 
3287  const size_t M( A.spacing() );
3288  const size_t N( B.columns() );
3289  const size_t K( A.columns() );
3290 
3291  const IntrinsicType factor( set( scalar ) );
3292 
3293  size_t i( 0UL );
3294 
3295  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
3296  for( size_t j=0UL; j<N; ++j ) {
3297  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3298  for( size_t k=0UL; k<K; ++k ) {
3299  const IntrinsicType b1( set( B(k,j) ) );
3300  xmm1 = xmm1 + A.get(i ,k) * b1;
3301  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
3302  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
3303  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
3304  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
3305  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
3306  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
3307  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
3308  }
3309  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3310  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) - xmm2 * factor );
3311  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) - xmm3 * factor );
3312  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) - xmm4 * factor );
3313  store( &(~C)(i+IT::size*4UL,j), load( &(~C)(i+IT::size*4UL,j) ) - xmm5 * factor );
3314  store( &(~C)(i+IT::size*5UL,j), load( &(~C)(i+IT::size*5UL,j) ) - xmm6 * factor );
3315  store( &(~C)(i+IT::size*6UL,j), load( &(~C)(i+IT::size*6UL,j) ) - xmm7 * factor );
3316  store( &(~C)(i+IT::size*7UL,j), load( &(~C)(i+IT::size*7UL,j) ) - xmm8 * factor );
3317  }
3318  }
3319  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
3320  size_t j( 0UL );
3321  for( ; (j+2UL) <= N; j+=2UL ) {
3322  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3323  for( size_t k=0UL; k<K; ++k ) {
3324  const IntrinsicType a1( A.get(i ,k) );
3325  const IntrinsicType a2( A.get(i+IT::size ,k) );
3326  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
3327  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
3328  const IntrinsicType b1( set( B(k,j ) ) );
3329  const IntrinsicType b2( set( B(k,j+1UL) ) );
3330  xmm1 = xmm1 + a1 * b1;
3331  xmm2 = xmm2 + a2 * b1;
3332  xmm3 = xmm3 + a3 * b1;
3333  xmm4 = xmm4 + a4 * b1;
3334  xmm5 = xmm5 + a1 * b2;
3335  xmm6 = xmm6 + a2 * b2;
3336  xmm7 = xmm7 + a3 * b2;
3337  xmm8 = xmm8 + a4 * b2;
3338  }
3339  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3340  store( &(~C)(i+IT::size ,j ), load( &(~C)(i+IT::size ,j ) ) - xmm2 * factor );
3341  store( &(~C)(i+IT::size*2UL,j ), load( &(~C)(i+IT::size*2UL,j ) ) - xmm3 * factor );
3342  store( &(~C)(i+IT::size*3UL,j ), load( &(~C)(i+IT::size*3UL,j ) ) - xmm4 * factor );
3343  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) - xmm5 * factor );
3344  store( &(~C)(i+IT::size ,j+1UL), load( &(~C)(i+IT::size ,j+1UL) ) - xmm6 * factor );
3345  store( &(~C)(i+IT::size*2UL,j+1UL), load( &(~C)(i+IT::size*2UL,j+1UL) ) - xmm7 * factor );
3346  store( &(~C)(i+IT::size*3UL,j+1UL), load( &(~C)(i+IT::size*3UL,j+1UL) ) - xmm8 * factor );
3347  }
3348  if( j < N ) {
3349  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3350  for( size_t k=0UL; k<K; ++k ) {
3351  const IntrinsicType b1( set( B(k,j) ) );
3352  xmm1 = xmm1 + A.get(i ,k) * b1;
3353  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
3354  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
3355  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
3356  }
3357  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3358  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) - xmm2 * factor );
3359  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) - xmm3 * factor );
3360  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) - xmm4 * factor );
3361  }
3362  }
3363  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
3364  size_t j( 0UL );
3365  for( ; (j+2UL) <= N; j+=2UL ) {
3366  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3367  for( size_t k=0UL; k<K; ++k ) {
3368  const IntrinsicType a1( A.get(i ,k) );
3369  const IntrinsicType a2( A.get(i+IT::size,k) );
3370  const IntrinsicType b1( set( B(k,j ) ) );
3371  const IntrinsicType b2( set( B(k,j+1UL) ) );
3372  xmm1 = xmm1 + a1 * b1;
3373  xmm2 = xmm2 + a2 * b1;
3374  xmm3 = xmm3 + a1 * b2;
3375  xmm4 = xmm4 + a2 * b2;
3376  }
3377  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3378  store( &(~C)(i+IT::size,j ), load( &(~C)(i+IT::size,j ) ) - xmm2 * factor );
3379  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) - xmm3 * factor );
3380  store( &(~C)(i+IT::size,j+1UL), load( &(~C)(i+IT::size,j+1UL) ) - xmm4 * factor );
3381  }
3382  if( j < N ) {
3383  IntrinsicType xmm1, xmm2;
3384  for( size_t k=0UL; k<K; ++k ) {
3385  const IntrinsicType b1( set( B(k,j) ) );
3386  xmm1 = xmm1 + A.get(i ,k) * b1;
3387  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
3388  }
3389  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3390  store( &(~C)(i+IT::size,j), load( &(~C)(i+IT::size,j) ) - xmm2 * factor );
3391  }
3392  }
3393  if( i < M ) {
3394  size_t j( 0UL );
3395  for( ; (j+2UL) <= N; j+=2UL ) {
3396  IntrinsicType xmm1, xmm2;
3397  for( size_t k=0UL; k<K; ++k ) {
3398  const IntrinsicType a1( A.get(i,k) );
3399  xmm1 = xmm1 + a1 * set( B(k,j ) );
3400  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3401  }
3402  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3403  store( &(~C)(i,j+1UL), load( &(~C)(i,j+1UL) ) - xmm2 * factor );
3404  }
3405  if( j < N ) {
3406  IntrinsicType xmm1;
3407  for( size_t k=0UL; k<K; ++k ) {
3408  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
3409  }
3410  store( &(~C)(i,j), load( &(~C)(i,j) ) - xmm1 * factor );
3411  }
3412  }
3413  }
3414  //**********************************************************************************************
3415 
3416  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3430  template< typename MT3 // Type of the left-hand side target matrix
3431  , typename MT4 // Type of the left-hand side matrix operand
3432  , typename MT5 // Type of the right-hand side matrix operand
3433  , typename ST2 > // Type of the scalar value
3434  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3435  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3436  {
3437  selectDefaultSubAssignKernel( C, A, B, scalar );
3438  }
3439  //**********************************************************************************************
3440 
3441  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3442 #if BLAZE_BLAS_MODE
3443 
3456  template< typename MT3 // Type of the left-hand side target matrix
3457  , typename MT4 // Type of the left-hand side matrix operand
3458  , typename MT5 // Type of the right-hand side matrix operand
3459  , typename ST2 > // Type of the scalar value
3460  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3461  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3462  {
3463  using boost::numeric_cast;
3464 
3465  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
3466  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
3467  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
3468 
3469  const int M ( numeric_cast<int>( A.rows() ) );
3470  const int N ( numeric_cast<int>( B.columns() ) );
3471  const int K ( numeric_cast<int>( A.columns() ) );
3472  const int lda( numeric_cast<int>( A.spacing() ) );
3473  const int ldb( numeric_cast<int>( B.spacing() ) );
3474  const int ldc( numeric_cast<int>( C.spacing() ) );
3475 
3476  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3477  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3478  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3479  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3480  }
3481 #endif
3482  //**********************************************************************************************
3483 
3484  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3485 #if BLAZE_BLAS_MODE
3486 
3499  template< typename MT3 // Type of the left-hand side target matrix
3500  , typename MT4 // Type of the left-hand side matrix operand
3501  , typename MT5 // Type of the right-hand side matrix operand
3502  , typename ST2 > // Type of the scalar value
3503  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3504  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3505  {
3506  using boost::numeric_cast;
3507 
3508  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3509  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3510  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3511 
3512  const int M ( numeric_cast<int>( A.rows() ) );
3513  const int N ( numeric_cast<int>( B.columns() ) );
3514  const int K ( numeric_cast<int>( A.columns() ) );
3515  const int lda( numeric_cast<int>( A.spacing() ) );
3516  const int ldb( numeric_cast<int>( B.spacing() ) );
3517  const int ldc( numeric_cast<int>( C.spacing() ) );
3518 
3519  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3520  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3521  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3522  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3523  }
3524 #endif
3525  //**********************************************************************************************
3526 
3527  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3528 #if BLAZE_BLAS_MODE
3529 
3542  template< typename MT3 // Type of the left-hand side target matrix
3543  , typename MT4 // Type of the left-hand side matrix operand
3544  , typename MT5 // Type of the right-hand side matrix operand
3545  , typename ST2 > // Type of the scalar value
3546  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3547  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3548  {
3549  using boost::numeric_cast;
3550 
3551  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3552  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3553  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3554  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3555  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3556  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3557 
3558  const int M ( numeric_cast<int>( A.rows() ) );
3559  const int N ( numeric_cast<int>( B.columns() ) );
3560  const int K ( numeric_cast<int>( A.columns() ) );
3561  const int lda( numeric_cast<int>( A.spacing() ) );
3562  const int ldb( numeric_cast<int>( B.spacing() ) );
3563  const int ldc( numeric_cast<int>( C.spacing() ) );
3564  const complex<float> alpha( -scalar );
3565  const complex<float> beta ( 1.0F, 0.0F );
3566 
3567  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3568  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3569  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3570  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3571  }
3572 #endif
3573  //**********************************************************************************************
3574 
3575  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3576 #if BLAZE_BLAS_MODE
3577 
3590  template< typename MT3 // Type of the left-hand side target matrix
3591  , typename MT4 // Type of the left-hand side matrix operand
3592  , typename MT5 // Type of the right-hand side matrix operand
3593  , typename ST2 > // Type of the scalar value
3594  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3595  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3596  {
3597  using boost::numeric_cast;
3598 
3599  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3600  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3601  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3602  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3603  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3604  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3605 
3606  const int M ( numeric_cast<int>( A.rows() ) );
3607  const int N ( numeric_cast<int>( B.columns() ) );
3608  const int K ( numeric_cast<int>( A.columns() ) );
3609  const int lda( numeric_cast<int>( A.spacing() ) );
3610  const int ldb( numeric_cast<int>( B.spacing() ) );
3611  const int ldc( numeric_cast<int>( C.spacing() ) );
3612  const complex<double> alpha( -scalar );
3613  const complex<double> beta ( 1.0, 0.0 );
3614 
3615  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3616  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3617  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3618  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3619  }
3620 #endif
3621  //**********************************************************************************************
3622 
3623  //**Subtraction assignment to sparse matrices***************************************************
3624  // No special implementation for the subtraction assignment to sparse matrices.
3625  //**********************************************************************************************
3626 
3627  //**Multiplication assignment to dense matrices*************************************************
3628  // No special implementation for the multiplication assignment to dense matrices.
3629  //**********************************************************************************************
3630 
3631  //**Multiplication assignment to sparse matrices************************************************
3632  // No special implementation for the multiplication assignment to sparse matrices.
3633  //**********************************************************************************************
3634 
3635  //**Compile time checks*************************************************************************
3644  //**********************************************************************************************
3645 };
3647 //*************************************************************************************************
3648 
3649 
3650 
3651 
3652 //=================================================================================================
3653 //
3654 // GLOBAL BINARY ARITHMETIC OPERATORS
3655 //
3656 //=================================================================================================
3657 
3658 //*************************************************************************************************
3684 template< typename T1 // Type of the left-hand side dense matrix
3685  , typename T2 > // Type of the right-hand side dense matrix
3686 inline const TDMatTDMatMultExpr<T1,T2>
3688 {
3690 
3691  if( (~lhs).columns() != (~rhs).rows() )
3692  throw std::invalid_argument( "Matrix sizes do not match" );
3693 
3694  return TDMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
3695 }
3696 //*************************************************************************************************
3697 
3698 
3699 
3700 
3701 //=================================================================================================
3702 //
3703 // EXPRESSION TRAIT SPECIALIZATIONS
3704 //
3705 //=================================================================================================
3706 
3707 //*************************************************************************************************
3709 template< typename MT1, typename MT2, typename VT >
3710 struct TDMatDVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
3711 {
3712  public:
3713  //**********************************************************************************************
3714  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3715  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3716  IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
3717  , typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
3718  , INVALID_TYPE >::Type Type;
3719  //**********************************************************************************************
3720 };
3722 //*************************************************************************************************
3723 
3724 
3725 //*************************************************************************************************
3727 template< typename MT1, typename MT2, typename VT >
3728 struct TDMatSVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
3729 {
3730  public:
3731  //**********************************************************************************************
3732  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3733  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3734  IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
3735  , typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
3736  , INVALID_TYPE >::Type Type;
3737  //**********************************************************************************************
3738 };
3740 //*************************************************************************************************
3741 
3742 
3743 //*************************************************************************************************
3745 template< typename VT, typename MT1, typename MT2 >
3746 struct TDVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
3747 {
3748  public:
3749  //**********************************************************************************************
3750  typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
3751  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3752  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3753  , typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3754  , INVALID_TYPE >::Type Type;
3755  //**********************************************************************************************
3756 };
3758 //*************************************************************************************************
3759 
3760 
3761 //*************************************************************************************************
3763 template< typename VT, typename MT1, typename MT2 >
3764 struct TSVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
3765 {
3766  public:
3767  //**********************************************************************************************
3768  typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
3769  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3770  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3771  , typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3772  , INVALID_TYPE >::Type Type;
3773  //**********************************************************************************************
3774 };
3776 //*************************************************************************************************
3777 
3778 
3779 //*************************************************************************************************
3781 template< typename MT1, typename MT2 >
3782 struct RowExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
3783 {
3784  public:
3785  //**********************************************************************************************
3786  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
3787  //**********************************************************************************************
3788 };
3790 //*************************************************************************************************
3791 
3792 
3793 //*************************************************************************************************
3795 template< typename MT1, typename MT2 >
3796 struct ColumnExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
3797 {
3798  public:
3799  //**********************************************************************************************
3800  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
3801  //**********************************************************************************************
3802 };
3804 //*************************************************************************************************
3805 
3806 } // namespace blaze
3807 
3808 #endif