All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
38 #include <blaze/math/Intrinsics.h>
39 #include <blaze/math/shims/Reset.h>
57 #include <blaze/system/BLAS.h>
59 #include <blaze/util/Assert.h>
60 #include <blaze/util/Complex.h>
65 #include <blaze/util/EnableIf.h>
66 #include <blaze/util/InvalidType.h>
68 #include <blaze/util/SelectType.h>
69 #include <blaze/util/Types.h>
75 
76 
77 namespace blaze {
78 
79 //=================================================================================================
80 //
81 // CLASS TDMATTDMATMULTEXPR
82 //
83 //=================================================================================================
84 
85 //*************************************************************************************************
92 template< typename MT1 // Type of the left-hand side dense matrix
93  , typename MT2 > // Type of the right-hand side dense matrix
94 class TDMatTDMatMultExpr : public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
95  , private Expression
96  , private Computation
97 {
98  private:
99  //**Type definitions****************************************************************************
100  typedef typename MT1::ResultType RT1;
101  typedef typename MT2::ResultType RT2;
102  typedef typename MT1::ElementType ET1;
103  typedef typename MT2::ElementType ET2;
104  typedef typename MT1::CompositeType CT1;
105  typedef typename MT2::CompositeType CT2;
106  //**********************************************************************************************
107 
108  //**********************************************************************************************
110 
111 
113  template< typename T1, typename T2, typename T3 >
114  struct UseSinglePrecisionKernel {
118  };
120  //**********************************************************************************************
121 
122  //**********************************************************************************************
124 
125 
127  template< typename T1, typename T2, typename T3 >
128  struct UseDoublePrecisionKernel {
132  };
134  //**********************************************************************************************
135 
136  //**********************************************************************************************
138 
139 
142  template< typename T1, typename T2, typename T3 >
143  struct UseSinglePrecisionComplexKernel {
144  typedef complex<float> Type;
145  enum { value = IsSame<typename T1::ElementType,Type>::value &&
146  IsSame<typename T2::ElementType,Type>::value &&
147  IsSame<typename T3::ElementType,Type>::value };
148  };
150  //**********************************************************************************************
151 
152  //**********************************************************************************************
154 
155 
158  template< typename T1, typename T2, typename T3 >
159  struct UseDoublePrecisionComplexKernel {
160  typedef complex<double> Type;
161  enum { value = IsSame<typename T1::ElementType,Type>::value &&
162  IsSame<typename T2::ElementType,Type>::value &&
163  IsSame<typename T3::ElementType,Type>::value };
164  };
166  //**********************************************************************************************
167 
168  //**********************************************************************************************
170 
171 
173  template< typename T1, typename T2, typename T3 >
174  struct UseDefaultKernel {
175  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
176  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
177  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
178  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
179  };
181  //**********************************************************************************************
182 
183  //**********************************************************************************************
185 
186 
188  template< typename T1, typename T2, typename T3 >
189  struct UseVectorizedDefaultKernel {
190  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
191  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
192  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
193  IntrinsicTrait<typename T1::ElementType>::addition &&
194  IntrinsicTrait<typename T1::ElementType>::multiplication };
195  };
197  //**********************************************************************************************
198 
199  public:
200  //**Type definitions****************************************************************************
203  typedef typename ResultType::OppositeType OppositeType;
204  typedef typename ResultType::TransposeType TransposeType;
205  typedef typename ResultType::ElementType ElementType;
207  typedef const ElementType ReturnType;
208  typedef const ResultType CompositeType;
209 
211  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
212 
214  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
215 
217  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
218 
220  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
221  //**********************************************************************************************
222 
223  //**Compilation flags***************************************************************************
225  enum { vectorizable = 0 };
226  //**********************************************************************************************
227 
228  //**Constructor*********************************************************************************
234  explicit inline TDMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
235  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
236  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
237  {
238  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
239  }
240  //**********************************************************************************************
241 
242  //**Access operator*****************************************************************************
249  inline ReturnType operator()( size_t i, size_t j ) const {
250  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
251  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
252 
253  ElementType tmp;
254 
255  if( lhs_.columns() != 0UL ) {
256  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
257  tmp = lhs_(i,0UL) * rhs_(0UL,j);
258  for( size_t k=1UL; k<end; k+=2UL ) {
259  tmp += lhs_(i,k ) * rhs_(k ,j);
260  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
261  }
262  if( end < lhs_.columns() ) {
263  tmp += lhs_(i,end) * rhs_(end,j);
264  }
265  }
266  else {
267  reset( tmp );
268  }
269 
270  return tmp;
271  }
272  //**********************************************************************************************
273 
274  //**Rows function*******************************************************************************
279  inline size_t rows() const {
280  return lhs_.rows();
281  }
282  //**********************************************************************************************
283 
284  //**Columns function****************************************************************************
289  inline size_t columns() const {
290  return rhs_.columns();
291  }
292  //**********************************************************************************************
293 
294  //**Left operand access*************************************************************************
299  inline LeftOperand leftOperand() const {
300  return lhs_;
301  }
302  //**********************************************************************************************
303 
304  //**Right operand access************************************************************************
309  inline RightOperand rightOperand() const {
310  return rhs_;
311  }
312  //**********************************************************************************************
313 
314  //**********************************************************************************************
320  template< typename T >
321  inline bool canAlias( const T* alias ) const {
322  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
323  }
324  //**********************************************************************************************
325 
326  //**********************************************************************************************
332  template< typename T >
333  inline bool isAliased( const T* alias ) const {
334  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
335  }
336  //**********************************************************************************************
337 
338  private:
339  //**Member variables****************************************************************************
342  //**********************************************************************************************
343 
344  //**Assignment to dense matrices****************************************************************
354  template< typename MT // Type of the target dense matrix
355  , bool SO > // Storage order of the target dense matrix
356  friend inline void assign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
357  {
359 
360  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
361  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
362 
363  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
364  return;
365  }
366  else if( rhs.lhs_.columns() == 0UL ) {
367  reset( ~lhs );
368  return;
369  }
370 
371  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
372  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
373 
374  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
375  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
376  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
377  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
378  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
379  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
380 
381  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
382  TDMatTDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
383  else
384  TDMatTDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
385  }
387  //**********************************************************************************************
388 
389  //**Default assignment to dense matrices********************************************************
403  template< typename MT3 // Type of the left-hand side target matrix
404  , typename MT4 // Type of the left-hand side matrix operand
405  , typename MT5 > // Type of the right-hand side matrix operand
406  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
407  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
408  {
409  const size_t M( A.rows() );
410  const size_t N( B.columns() );
411  const size_t K( A.columns() );
412 
413  for( size_t i=0UL; i<M; ++i ) {
414  for( size_t j=0UL; j<N; ++j ) {
415  C(i,j) = A(i,0UL) * B(0UL,j);
416  }
417  for( size_t k=1UL; k<K; ++k ) {
418  for( size_t j=0UL; j<N; ++j ) {
419  C(i,j) += A(i,k) * B(k,j);
420  }
421  }
422  }
423  }
425  //**********************************************************************************************
426 
427  //**Vectorized default assignment to row-major dense matrices***********************************
441  template< typename MT3 // Type of the left-hand side target matrix
442  , typename MT4 // Type of the left-hand side matrix operand
443  , typename MT5 > // Type of the right-hand side matrix operand
444  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
445  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
446  {
447  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
448  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
449 
450  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
451  const typename MT5::OppositeType tmp( B );
452  assign( ~C, A * tmp );
453  }
454  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
455  const typename MT4::OppositeType tmp( A );
456  assign( ~C, tmp * B );
457  }
458  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
459  const typename MT5::OppositeType tmp( B );
460  assign( ~C, A * tmp );
461  }
462  else {
463  const typename MT4::OppositeType tmp( A );
464  assign( ~C, tmp * B );
465  }
466  }
468  //**********************************************************************************************
469 
470  //**Vectorized default assignment to column-major dense matrices********************************
484  template< typename MT3 // Type of the left-hand side target matrix
485  , typename MT4 // Type of the left-hand side matrix operand
486  , typename MT5 > // Type of the right-hand side matrix operand
487  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
488  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
489  {
490  typedef IntrinsicTrait<ElementType> IT;
491 
492  const size_t M( A.spacing() );
493  const size_t N( B.columns() );
494  const size_t K( A.columns() );
495 
496  size_t i( 0UL );
497 
498  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
499  for( size_t j=0UL; j<N; ++j ) {
500  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
501  for( size_t k=0UL; k<K; ++k ) {
502  const IntrinsicType b1( set( B(k,j) ) );
503  xmm1 = xmm1 + A.get(i ,k) * b1;
504  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
505  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
506  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
507  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
508  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
509  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
510  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
511  }
512  store( &(~C)(i ,j), xmm1 );
513  store( &(~C)(i+IT::size ,j), xmm2 );
514  store( &(~C)(i+IT::size*2UL,j), xmm3 );
515  store( &(~C)(i+IT::size*3UL,j), xmm4 );
516  store( &(~C)(i+IT::size*4UL,j), xmm5 );
517  store( &(~C)(i+IT::size*5UL,j), xmm6 );
518  store( &(~C)(i+IT::size*6UL,j), xmm7 );
519  store( &(~C)(i+IT::size*7UL,j), xmm8 );
520  }
521  }
522  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
523  size_t j( 0UL );
524  for( ; (j+2UL) <= N; j+=2UL ) {
525  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
526  for( size_t k=0UL; k<K; ++k ) {
527  const IntrinsicType a1( A.get(i ,k) );
528  const IntrinsicType a2( A.get(i+IT::size ,k) );
529  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
530  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
531  const IntrinsicType b1( set( B(k,j ) ) );
532  const IntrinsicType b2( set( B(k,j+1UL) ) );
533  xmm1 = xmm1 + a1 * b1;
534  xmm2 = xmm2 + a2 * b1;
535  xmm3 = xmm3 + a3 * b1;
536  xmm4 = xmm4 + a4 * b1;
537  xmm5 = xmm5 + a1 * b2;
538  xmm6 = xmm6 + a2 * b2;
539  xmm7 = xmm7 + a3 * b2;
540  xmm8 = xmm8 + a4 * b2;
541  }
542  store( &(~C)(i ,j ), xmm1 );
543  store( &(~C)(i+IT::size ,j ), xmm2 );
544  store( &(~C)(i+IT::size*2UL,j ), xmm3 );
545  store( &(~C)(i+IT::size*3UL,j ), xmm4 );
546  store( &(~C)(i ,j+1UL), xmm5 );
547  store( &(~C)(i+IT::size ,j+1UL), xmm6 );
548  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 );
549  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 );
550  }
551  if( j < N ) {
552  IntrinsicType xmm1, xmm2, xmm3, xmm4;
553  for( size_t k=0UL; k<K; ++k ) {
554  const IntrinsicType b1( set( B(k,j) ) );
555  xmm1 = xmm1 + A.get(i ,k) * b1;
556  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
557  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
558  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
559  }
560  store( &(~C)(i ,j), xmm1 );
561  store( &(~C)(i+IT::size ,j), xmm2 );
562  store( &(~C)(i+IT::size*2UL,j), xmm3 );
563  store( &(~C)(i+IT::size*3UL,j), xmm4 );
564  }
565  }
566  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
567  size_t j( 0UL );
568  for( ; (j+2UL) <= N; j+=2UL ) {
569  IntrinsicType xmm1, xmm2, xmm3, xmm4;
570  for( size_t k=0UL; k<K; ++k ) {
571  const IntrinsicType a1( A.get(i ,k) );
572  const IntrinsicType a2( A.get(i+IT::size,k) );
573  const IntrinsicType b1( set( B(k,j ) ) );
574  const IntrinsicType b2( set( B(k,j+1UL) ) );
575  xmm1 = xmm1 + a1 * b1;
576  xmm2 = xmm2 + a2 * b1;
577  xmm3 = xmm3 + a1 * b2;
578  xmm4 = xmm4 + a2 * b2;
579  }
580  store( &(~C)(i ,j ), xmm1 );
581  store( &(~C)(i+IT::size,j ), xmm2 );
582  store( &(~C)(i ,j+1UL), xmm3 );
583  store( &(~C)(i+IT::size,j+1UL), xmm4 );
584  }
585  if( j < N ) {
586  IntrinsicType xmm1, xmm2;
587  for( size_t k=0UL; k<K; ++k ) {
588  const IntrinsicType b1( set( B(k,j) ) );
589  xmm1 = xmm1 + A.get(i ,k) * b1;
590  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
591  }
592  store( &(~C)(i ,j), xmm1 );
593  store( &(~C)(i+IT::size,j), xmm2 );
594  }
595  }
596  if( i < M ) {
597  size_t j( 0UL );
598  for( ; (j+2UL) <= N; j+=2UL ) {
599  IntrinsicType xmm1, xmm2;
600  for( size_t k=0UL; k<K; ++k ) {
601  const IntrinsicType a1( A.get(i,k) );
602  xmm1 = xmm1 + a1 * set( B(k,j ) );
603  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
604  }
605  store( &(~C)(i,j ), xmm1 );
606  store( &(~C)(i,j+1UL), xmm2 );
607  }
608  if( j < N ) {
609  IntrinsicType xmm1;
610  for( size_t k=0UL; k<K; ++k ) {
611  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
612  }
613  store( &(~C)(i,j), xmm1 );
614  }
615  }
616  }
618  //**********************************************************************************************
619 
620  //**BLAS-based assignment to dense matrices (default)*******************************************
634  template< typename MT3 // Type of the left-hand side target matrix
635  , typename MT4 // Type of the left-hand side matrix operand
636  , typename MT5 > // Type of the right-hand side matrix operand
637  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
638  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
639  {
640  selectDefaultAssignKernel( C, A, B );
641  }
643  //**********************************************************************************************
644 
645  //**BLAS-based assignment to dense matrices (single precision)**********************************
646 #if BLAZE_BLAS_MODE
647 
660  template< typename MT3 // Type of the left-hand side target matrix
661  , typename MT4 // Type of the left-hand side matrix operand
662  , typename MT5 > // Type of the right-hand side matrix operand
663  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
664  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
665  {
666  using boost::numeric_cast;
667 
668  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
669  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
670  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
671 
672  const int M ( numeric_cast<int>( A.rows() ) );
673  const int N ( numeric_cast<int>( B.columns() ) );
674  const int K ( numeric_cast<int>( A.columns() ) );
675  const int lda( numeric_cast<int>( A.spacing() ) );
676  const int ldb( numeric_cast<int>( B.spacing() ) );
677  const int ldc( numeric_cast<int>( C.spacing() ) );
678 
679  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
680  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
681  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
682  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
683  }
685 #endif
686  //**********************************************************************************************
687 
688  //**BLAS-based assignment to dense matrices (double precision)**********************************
689 #if BLAZE_BLAS_MODE
690 
703  template< typename MT3 // Type of the left-hand side target matrix
704  , typename MT4 // Type of the left-hand side matrix operand
705  , typename MT5 > // Type of the right-hand side matrix operand
706  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
707  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
708  {
709  using boost::numeric_cast;
710 
711  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
712  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
713  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
714 
715  const int M ( numeric_cast<int>( A.rows() ) );
716  const int N ( numeric_cast<int>( B.columns() ) );
717  const int K ( numeric_cast<int>( A.columns() ) );
718  const int lda( numeric_cast<int>( A.spacing() ) );
719  const int ldb( numeric_cast<int>( B.spacing() ) );
720  const int ldc( numeric_cast<int>( C.spacing() ) );
721 
722  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
723  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
724  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
725  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
726  }
728 #endif
729  //**********************************************************************************************
730 
731  //**BLAS-based assignment to dense matrices (single precision complex)**************************
732 #if BLAZE_BLAS_MODE
733 
746  template< typename MT3 // Type of the left-hand side target matrix
747  , typename MT4 // Type of the left-hand side matrix operand
748  , typename MT5 > // Type of the right-hand side matrix operand
749  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
750  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
751  {
752  using boost::numeric_cast;
753 
754  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
755  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
756  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
757  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
758  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
759  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
760 
761  const int M ( numeric_cast<int>( A.rows() ) );
762  const int N ( numeric_cast<int>( B.columns() ) );
763  const int K ( numeric_cast<int>( A.columns() ) );
764  const int lda( numeric_cast<int>( A.spacing() ) );
765  const int ldb( numeric_cast<int>( B.spacing() ) );
766  const int ldc( numeric_cast<int>( C.spacing() ) );
767  complex<float> alpha( 1.0F, 0.0F );
768  complex<float> beta ( 0.0F, 0.0F );
769 
770  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
771  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
772  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
773  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
774  }
776 #endif
777  //**********************************************************************************************
778 
779  //**BLAS-based assignment to dense matrices (double precision complex)**************************
780 #if BLAZE_BLAS_MODE
781 
794  template< typename MT3 // Type of the left-hand side target matrix
795  , typename MT4 // Type of the left-hand side matrix operand
796  , typename MT5 > // Type of the right-hand side matrix operand
797  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
798  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
799  {
800  using boost::numeric_cast;
801 
802  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
803  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
804  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
805  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
806  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
807  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
808 
809  const int M ( numeric_cast<int>( A.rows() ) );
810  const int N ( numeric_cast<int>( B.columns() ) );
811  const int K ( numeric_cast<int>( A.columns() ) );
812  const int lda( numeric_cast<int>( A.spacing() ) );
813  const int ldb( numeric_cast<int>( B.spacing() ) );
814  const int ldc( numeric_cast<int>( C.spacing() ) );
815  complex<double> alpha( 1.0, 0.0 );
816  complex<double> beta ( 0.0, 0.0 );
817 
818  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
819  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
820  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
821  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
822  }
824 #endif
825  //**********************************************************************************************
826 
827  //**Assignment to sparse matrices***************************************************************
840  template< typename MT // Type of the target sparse matrix
841  , bool SO > // Storage order of the target sparse matrix
842  friend inline void assign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
843  {
845 
846  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
847 
853  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
854 
855  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
856  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
857 
858  const TmpType tmp( rhs );
859  assign( ~lhs, tmp );
860  }
862  //**********************************************************************************************
863 
864  //**Addition assignment to dense matrices*******************************************************
877  template< typename MT // Type of the target dense matrix
878  , bool SO > // Storage order of the target dense matrix
879  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
880  {
882 
883  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
884  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
885 
886  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
887  return;
888  }
889 
890  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
891  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
892 
893  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
894  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
895  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
896  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
897  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
898  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
899 
900  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
901  TDMatTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
902  else
903  TDMatTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
904  }
906  //**********************************************************************************************
907 
908  //**Default addition assignment to dense matrices***********************************************
922  template< typename MT3 // Type of the left-hand side target matrix
923  , typename MT4 // Type of the left-hand side matrix operand
924  , typename MT5 > // Type of the right-hand side matrix operand
925  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
926  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
927  {
928  const size_t M( A.rows() );
929  const size_t N( B.columns() );
930  const size_t K( A.columns() );
931 
932  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
933  const size_t end( N & size_t(-2) );
934 
935  for( size_t i=0UL; i<M; ++i ) {
936  for( size_t k=0UL; k<K; ++k ) {
937  for( size_t j=0UL; j<end; j+=2UL ) {
938  C(i,j ) += A(i,k) * B(k,j );
939  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
940  }
941  if( end < N ) {
942  C(i,end) += A(i,k) * B(k,end);
943  }
944  }
945  }
946  }
948  //**********************************************************************************************
949 
950  //**Vectorized default addition assignment to row-major dense matrices**************************
964  template< typename MT3 // Type of the left-hand side target matrix
965  , typename MT4 // Type of the left-hand side matrix operand
966  , typename MT5 > // Type of the right-hand side matrix operand
967  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
968  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
969  {
970  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
971  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
972 
973  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
974  const typename MT5::OppositeType tmp( B );
975  addAssign( ~C, A * tmp );
976  }
977  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
978  const typename MT4::OppositeType tmp( A );
979  addAssign( ~C, tmp * B );
980  }
981  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
982  const typename MT5::OppositeType tmp( B );
983  addAssign( ~C, A * tmp );
984  }
985  else {
986  const typename MT4::OppositeType tmp( A );
987  addAssign( ~C, tmp * B );
988  }
989  }
991  //**********************************************************************************************
992 
993  //**Vectorized default addition assignment to column-major dense matrices***********************
1007  template< typename MT3 // Type of the left-hand side target matrix
1008  , typename MT4 // Type of the left-hand side matrix operand
1009  , typename MT5 > // Type of the right-hand side matrix operand
1010  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1011  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1012  {
1013  typedef IntrinsicTrait<ElementType> IT;
1014 
1015  const size_t M( A.spacing() );
1016  const size_t N( B.columns() );
1017  const size_t K( A.columns() );
1018 
1019  size_t i( 0UL );
1020 
1021  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1022  for( size_t j=0UL; j<N; ++j ) {
1023  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1024  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1025  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1026  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1027  IntrinsicType xmm5( load( &(~C)(i+IT::size*4UL,j) ) );
1028  IntrinsicType xmm6( load( &(~C)(i+IT::size*5UL,j) ) );
1029  IntrinsicType xmm7( load( &(~C)(i+IT::size*6UL,j) ) );
1030  IntrinsicType xmm8( load( &(~C)(i+IT::size*7UL,j) ) );
1031  for( size_t k=0UL; k<K; ++k ) {
1032  const IntrinsicType b1( set( B(k,j) ) );
1033  xmm1 = xmm1 + A.get(i ,k) * b1;
1034  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
1035  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
1036  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
1037  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
1038  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
1039  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
1040  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
1041  }
1042  store( &(~C)(i ,j), xmm1 );
1043  store( &(~C)(i+IT::size ,j), xmm2 );
1044  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1045  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1046  store( &(~C)(i+IT::size*4UL,j), xmm5 );
1047  store( &(~C)(i+IT::size*5UL,j), xmm6 );
1048  store( &(~C)(i+IT::size*6UL,j), xmm7 );
1049  store( &(~C)(i+IT::size*7UL,j), xmm8 );
1050  }
1051  }
1052  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1053  size_t j( 0UL );
1054  for( ; (j+2UL) <= N; j+=2UL ) {
1055  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1056  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j ) ) );
1057  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j ) ) );
1058  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j ) ) );
1059  IntrinsicType xmm5( load( &(~C)(i ,j+1UL) ) );
1060  IntrinsicType xmm6( load( &(~C)(i+IT::size ,j+1UL) ) );
1061  IntrinsicType xmm7( load( &(~C)(i+IT::size*2UL,j+1UL) ) );
1062  IntrinsicType xmm8( load( &(~C)(i+IT::size*3UL,j+1UL) ) );
1063  for( size_t k=0UL; k<K; ++k ) {
1064  const IntrinsicType a1( A.get(i ,k) );
1065  const IntrinsicType a2( A.get(i+IT::size ,k) );
1066  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
1067  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
1068  const IntrinsicType b1( set( B(k,j ) ) );
1069  const IntrinsicType b2( set( B(k,j+1UL) ) );
1070  xmm1 = xmm1 + a1 * b1;
1071  xmm2 = xmm2 + a2 * b1;
1072  xmm3 = xmm3 + a3 * b1;
1073  xmm4 = xmm4 + a4 * b1;
1074  xmm5 = xmm5 + a1 * b2;
1075  xmm6 = xmm6 + a2 * b2;
1076  xmm7 = xmm7 + a3 * b2;
1077  xmm8 = xmm8 + a4 * b2;
1078  }
1079  store( &(~C)(i ,j ), xmm1 );
1080  store( &(~C)(i+IT::size ,j ), xmm2 );
1081  store( &(~C)(i+IT::size*2UL,j ), xmm3 );
1082  store( &(~C)(i+IT::size*3UL,j ), xmm4 );
1083  store( &(~C)(i ,j+1UL), xmm5 );
1084  store( &(~C)(i+IT::size ,j+1UL), xmm6 );
1085  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 );
1086  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 );
1087  }
1088  if( j < N ) {
1089  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1090  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1091  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1092  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1093  for( size_t k=0UL; k<K; ++k ) {
1094  const IntrinsicType b1( set( B(k,j) ) );
1095  xmm1 = xmm1 + A.get(i ,k) * b1;
1096  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
1097  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
1098  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
1099  }
1100  store( &(~C)(i ,j), xmm1 );
1101  store( &(~C)(i+IT::size ,j), xmm2 );
1102  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1103  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1104  }
1105  }
1106  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1107  size_t j( 0UL );
1108  for( ; (j+2UL) <= N; j+=2UL ) {
1109  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1110  IntrinsicType xmm2( load( &(~C)(i+IT::size,j ) ) );
1111  IntrinsicType xmm3( load( &(~C)(i ,j+1UL) ) );
1112  IntrinsicType xmm4( load( &(~C)(i+IT::size,j+1UL) ) );
1113  for( size_t k=0UL; k<K; ++k ) {
1114  const IntrinsicType a1( A.get(i ,k) );
1115  const IntrinsicType a2( A.get(i+IT::size,k) );
1116  const IntrinsicType b1( set( B(k,j ) ) );
1117  const IntrinsicType b2( set( B(k,j+1UL) ) );
1118  xmm1 = xmm1 + a1 * b1;
1119  xmm2 = xmm2 + a2 * b1;
1120  xmm3 = xmm3 + a1 * b2;
1121  xmm4 = xmm4 + a2 * b2;
1122  }
1123  store( &(~C)(i ,j ), xmm1 );
1124  store( &(~C)(i+IT::size,j ), xmm2 );
1125  store( &(~C)(i ,j+1UL), xmm3 );
1126  store( &(~C)(i+IT::size,j+1UL), xmm4 );
1127  }
1128  if( j < N ) {
1129  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1130  IntrinsicType xmm2( load( &(~C)(i+IT::size,j) ) );
1131  for( size_t k=0UL; k<K; ++k ) {
1132  const IntrinsicType b1( set( B(k,j) ) );
1133  xmm1 = xmm1 + A.get(i ,k) * b1;
1134  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
1135  }
1136  store( &(~C)(i ,j), xmm1 );
1137  store( &(~C)(i+IT::size,j), xmm2 );
1138  }
1139  }
1140  if( i < M ) {
1141  size_t j( 0UL );
1142  for( ; (j+2UL) <= N; j+=2UL ) {
1143  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1144  IntrinsicType xmm2( load( &(~C)(i,j+1UL) ) );
1145  for( size_t k=0UL; k<K; ++k ) {
1146  const IntrinsicType a1( A.get(i,k) );
1147  xmm1 = xmm1 + a1 * set( B(k,j ) );
1148  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1149  }
1150  store( &(~C)(i,j ), xmm1 );
1151  store( &(~C)(i,j+1UL), xmm2 );
1152  }
1153  if( j < N ) {
1154  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1155  for( size_t k=0UL; k<K; ++k ) {
1156  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
1157  }
1158  store( &(~C)(i,j), xmm1 );
1159  }
1160  }
1161  }
1163  //**********************************************************************************************
1164 
1165  //**BLAS-based addition assignment to dense matrices (default)**********************************
1179  template< typename MT3 // Type of the left-hand side target matrix
1180  , typename MT4 // Type of the left-hand side matrix operand
1181  , typename MT5 > // Type of the right-hand side matrix operand
1182  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1183  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1184  {
1185  selectDefaultAddAssignKernel( C, A, B );
1186  }
1188  //**********************************************************************************************
1189 
1190  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1191 #if BLAZE_BLAS_MODE
1192 
1205  template< typename MT3 // Type of the left-hand side target matrix
1206  , typename MT4 // Type of the left-hand side matrix operand
1207  , typename MT5 > // Type of the right-hand side matrix operand
1208  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1209  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1210  {
1211  using boost::numeric_cast;
1212 
1213  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1214  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1215  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1216 
1217  const int M ( numeric_cast<int>( A.rows() ) );
1218  const int N ( numeric_cast<int>( B.columns() ) );
1219  const int K ( numeric_cast<int>( A.columns() ) );
1220  const int lda( numeric_cast<int>( A.spacing() ) );
1221  const int ldb( numeric_cast<int>( B.spacing() ) );
1222  const int ldc( numeric_cast<int>( C.spacing() ) );
1223 
1224  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1225  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1226  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1227  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1228  }
1230 #endif
1231  //**********************************************************************************************
1232 
1233  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1234 #if BLAZE_BLAS_MODE
1235 
1248  template< typename MT3 // Type of the left-hand side target matrix
1249  , typename MT4 // Type of the left-hand side matrix operand
1250  , typename MT5 > // Type of the right-hand side matrix operand
1251  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1252  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1253  {
1254  using boost::numeric_cast;
1255 
1256  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1257  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1258  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1259 
1260  const int M ( numeric_cast<int>( A.rows() ) );
1261  const int N ( numeric_cast<int>( B.columns() ) );
1262  const int K ( numeric_cast<int>( A.columns() ) );
1263  const int lda( numeric_cast<int>( A.spacing() ) );
1264  const int ldb( numeric_cast<int>( B.spacing() ) );
1265  const int ldc( numeric_cast<int>( C.spacing() ) );
1266 
1267  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1268  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1269  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1270  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1271  }
1273 #endif
1274  //**********************************************************************************************
1275 
1276  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1277 #if BLAZE_BLAS_MODE
1278 
1291  template< typename MT3 // Type of the left-hand side target matrix
1292  , typename MT4 // Type of the left-hand side matrix operand
1293  , typename MT5 > // Type of the right-hand side matrix operand
1294  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1295  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1296  {
1297  using boost::numeric_cast;
1298 
1299  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1300  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1301  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1302  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1303  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1304  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1305 
1306  const int M ( numeric_cast<int>( A.rows() ) );
1307  const int N ( numeric_cast<int>( B.columns() ) );
1308  const int K ( numeric_cast<int>( A.columns() ) );
1309  const int lda( numeric_cast<int>( A.spacing() ) );
1310  const int ldb( numeric_cast<int>( B.spacing() ) );
1311  const int ldc( numeric_cast<int>( C.spacing() ) );
1312  const complex<float> alpha( 1.0F, 0.0F );
1313  const complex<float> beta ( 1.0F, 0.0F );
1314 
1315  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1316  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1317  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1318  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1319  }
1321 #endif
1322  //**********************************************************************************************
1323 
1324  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1325 #if BLAZE_BLAS_MODE
1326 
1339  template< typename MT3 // Type of the left-hand side target matrix
1340  , typename MT4 // Type of the left-hand side matrix operand
1341  , typename MT5 > // Type of the right-hand side matrix operand
1342  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1343  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1344  {
1345  using boost::numeric_cast;
1346 
1347  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1348  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1349  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1350  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1351  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1352  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1353 
1354  const int M ( numeric_cast<int>( A.rows() ) );
1355  const int N ( numeric_cast<int>( B.columns() ) );
1356  const int K ( numeric_cast<int>( A.columns() ) );
1357  const int lda( numeric_cast<int>( A.spacing() ) );
1358  const int ldb( numeric_cast<int>( B.spacing() ) );
1359  const int ldc( numeric_cast<int>( C.spacing() ) );
1360  const complex<double> alpha( 1.0, 0.0 );
1361  const complex<double> beta ( 1.0, 0.0 );
1362 
1363  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1364  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1365  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1366  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1367  }
1369 #endif
1370  //**********************************************************************************************
1371 
1372  //**Addition assignment to sparse matrices******************************************************
1373  // No special implementation for the addition assignment to sparse matrices.
1374  //**********************************************************************************************
1375 
1376  //**Subtraction assignment to dense matrices****************************************************
1389  template< typename MT // Type of the target dense matrix
1390  , bool SO > // Storage order of the target dense matrix
1391  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1392  {
1394 
1395  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1396  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1397 
1398  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1399  return;
1400  }
1401 
1402  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1403  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1404 
1405  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1406  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1407  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1408  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1409  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1410  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1411 
1412  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
1413  TDMatTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1414  else
1415  TDMatTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1416  }
1418  //**********************************************************************************************
1419 
1420  //**Default subtraction assignment to dense matrices********************************************
1434  template< typename MT3 // Type of the left-hand side target matrix
1435  , typename MT4 // Type of the left-hand side matrix operand
1436  , typename MT5 > // Type of the right-hand side matrix operand
1437  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1438  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1439  {
1440  const size_t M( A.rows() );
1441  const size_t N( B.columns() );
1442  const size_t K( A.columns() );
1443 
1444  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1445  const size_t end( N & size_t(-2) );
1446 
1447  for( size_t i=0UL; i<M; ++i ) {
1448  for( size_t k=0UL; k<K; ++k ) {
1449  for( size_t j=0UL; j<end; j+=2UL ) {
1450  C(i,j ) -= A(i,k) * B(k,j );
1451  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1452  }
1453  if( end < N ) {
1454  C(i,end) -= A(i,k) * B(k,end);
1455  }
1456  }
1457  }
1458  }
1460  //**********************************************************************************************
1461 
1462  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1476  template< typename MT3 // Type of the left-hand side target matrix
1477  , typename MT4 // Type of the left-hand side matrix operand
1478  , typename MT5 > // Type of the right-hand side matrix operand
1479  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1480  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1481  {
1482  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
1483  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
1484 
1485  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1486  const typename MT5::OppositeType tmp( B );
1487  subAssign( ~C, A * tmp );
1488  }
1489  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1490  const typename MT4::OppositeType tmp( A );
1491  subAssign( ~C, tmp * B );
1492  }
1493  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1494  const typename MT5::OppositeType tmp( B );
1495  subAssign( ~C, A * tmp );
1496  }
1497  else {
1498  const typename MT4::OppositeType tmp( A );
1499  subAssign( ~C, tmp * B );
1500  }
1501  }
1503  //**********************************************************************************************
1504 
1505  //**Vectorized default subtraction assignment to column-major dense matrices********************
1519  template< typename MT3 // Type of the left-hand side target matrix
1520  , typename MT4 // Type of the left-hand side matrix operand
1521  , typename MT5 > // Type of the right-hand side matrix operand
1522  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1523  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1524  {
1525  typedef IntrinsicTrait<ElementType> IT;
1526 
1527  const size_t M( A.spacing() );
1528  const size_t N( B.columns() );
1529  const size_t K( A.columns() );
1530 
1531  size_t i( 0UL );
1532 
1533  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
1534  for( size_t j=0UL; j<N; ++j ) {
1535  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1536  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1537  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1538  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1539  IntrinsicType xmm5( load( &(~C)(i+IT::size*4UL,j) ) );
1540  IntrinsicType xmm6( load( &(~C)(i+IT::size*5UL,j) ) );
1541  IntrinsicType xmm7( load( &(~C)(i+IT::size*6UL,j) ) );
1542  IntrinsicType xmm8( load( &(~C)(i+IT::size*7UL,j) ) );
1543  for( size_t k=0UL; k<K; ++k ) {
1544  const IntrinsicType b1( set( B(k,j) ) );
1545  xmm1 = xmm1 - A.get(i ,k) * b1;
1546  xmm2 = xmm2 - A.get(i+IT::size ,k) * b1;
1547  xmm3 = xmm3 - A.get(i+IT::size*2UL,k) * b1;
1548  xmm4 = xmm4 - A.get(i+IT::size*3UL,k) * b1;
1549  xmm5 = xmm5 - A.get(i+IT::size*4UL,k) * b1;
1550  xmm6 = xmm6 - A.get(i+IT::size*5UL,k) * b1;
1551  xmm7 = xmm7 - A.get(i+IT::size*6UL,k) * b1;
1552  xmm8 = xmm8 - A.get(i+IT::size*7UL,k) * b1;
1553  }
1554  store( &(~C)(i ,j), xmm1 );
1555  store( &(~C)(i+IT::size ,j), xmm2 );
1556  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1557  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1558  store( &(~C)(i+IT::size*4UL,j), xmm5 );
1559  store( &(~C)(i+IT::size*5UL,j), xmm6 );
1560  store( &(~C)(i+IT::size*6UL,j), xmm7 );
1561  store( &(~C)(i+IT::size*7UL,j), xmm8 );
1562  }
1563  }
1564  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
1565  size_t j( 0UL );
1566  for( ; (j+2UL) <= N; j+=2UL ) {
1567  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1568  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j ) ) );
1569  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j ) ) );
1570  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j ) ) );
1571  IntrinsicType xmm5( load( &(~C)(i ,j+1UL) ) );
1572  IntrinsicType xmm6( load( &(~C)(i+IT::size ,j+1UL) ) );
1573  IntrinsicType xmm7( load( &(~C)(i+IT::size*2UL,j+1UL) ) );
1574  IntrinsicType xmm8( load( &(~C)(i+IT::size*3UL,j+1UL) ) );
1575  for( size_t k=0UL; k<K; ++k ) {
1576  const IntrinsicType a1( A.get(i ,k) );
1577  const IntrinsicType a2( A.get(i+IT::size ,k) );
1578  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
1579  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
1580  const IntrinsicType b1( set( B(k,j ) ) );
1581  const IntrinsicType b2( set( B(k,j+1UL) ) );
1582  xmm1 = xmm1 - a1 * b1;
1583  xmm2 = xmm2 - a2 * b1;
1584  xmm3 = xmm3 - a3 * b1;
1585  xmm4 = xmm4 - a4 * b1;
1586  xmm5 = xmm5 - a1 * b2;
1587  xmm6 = xmm6 - a2 * b2;
1588  xmm7 = xmm7 - a3 * b2;
1589  xmm8 = xmm8 - a4 * b2;
1590  }
1591  store( &(~C)(i ,j ), xmm1 );
1592  store( &(~C)(i+IT::size ,j ), xmm2 );
1593  store( &(~C)(i+IT::size*2UL,j ), xmm3 );
1594  store( &(~C)(i+IT::size*3UL,j ), xmm4 );
1595  store( &(~C)(i ,j+1UL), xmm5 );
1596  store( &(~C)(i+IT::size ,j+1UL), xmm6 );
1597  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 );
1598  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 );
1599  }
1600  if( j < N ) {
1601  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1602  IntrinsicType xmm2( load( &(~C)(i+IT::size ,j) ) );
1603  IntrinsicType xmm3( load( &(~C)(i+IT::size*2UL,j) ) );
1604  IntrinsicType xmm4( load( &(~C)(i+IT::size*3UL,j) ) );
1605  for( size_t k=0UL; k<K; ++k ) {
1606  const IntrinsicType b1( set( B(k,j) ) );
1607  xmm1 = xmm1 - A.get(i ,k) * b1;
1608  xmm2 = xmm2 - A.get(i+IT::size ,k) * b1;
1609  xmm3 = xmm3 - A.get(i+IT::size*2UL,k) * b1;
1610  xmm4 = xmm4 - A.get(i+IT::size*3UL,k) * b1;
1611  }
1612  store( &(~C)(i ,j), xmm1 );
1613  store( &(~C)(i+IT::size ,j), xmm2 );
1614  store( &(~C)(i+IT::size*2UL,j), xmm3 );
1615  store( &(~C)(i+IT::size*3UL,j), xmm4 );
1616  }
1617  }
1618  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
1619  size_t j( 0UL );
1620  for( ; (j+2UL) <= N; j+=2UL ) {
1621  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1622  IntrinsicType xmm2( load( &(~C)(i+IT::size,j ) ) );
1623  IntrinsicType xmm3( load( &(~C)(i ,j+1UL) ) );
1624  IntrinsicType xmm4( load( &(~C)(i+IT::size,j+1UL) ) );
1625  for( size_t k=0UL; k<K; ++k ) {
1626  const IntrinsicType a1( A.get(i ,k) );
1627  const IntrinsicType a2( A.get(i+IT::size,k) );
1628  const IntrinsicType b1( set( B(k,j ) ) );
1629  const IntrinsicType b2( set( B(k,j+1UL) ) );
1630  xmm1 = xmm1 - a1 * b1;
1631  xmm2 = xmm2 - a2 * b1;
1632  xmm3 = xmm3 - a1 * b2;
1633  xmm4 = xmm4 - a2 * b2;
1634  }
1635  store( &(~C)(i ,j ), xmm1 );
1636  store( &(~C)(i+IT::size,j ), xmm2 );
1637  store( &(~C)(i ,j+1UL), xmm3 );
1638  store( &(~C)(i+IT::size,j+1UL), xmm4 );
1639  }
1640  if( j < N ) {
1641  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1642  IntrinsicType xmm2( load( &(~C)(i+IT::size,j) ) );
1643  for( size_t k=0UL; k<K; ++k ) {
1644  const IntrinsicType b1( set( B(k,j) ) );
1645  xmm1 = xmm1 - A.get(i ,k) * b1;
1646  xmm2 = xmm2 - A.get(i+IT::size,k) * b1;
1647  }
1648  store( &(~C)(i ,j), xmm1 );
1649  store( &(~C)(i+IT::size,j), xmm2 );
1650  }
1651  }
1652  if( i < M ) {
1653  size_t j( 0UL );
1654  for( ; (j+2UL) <= N; j+=2UL ) {
1655  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1656  IntrinsicType xmm2( load( &(~C)(i,j+1UL) ) );
1657  for( size_t k=0UL; k<K; ++k ) {
1658  const IntrinsicType a1( A.get(i,k) );
1659  xmm1 = xmm1 - a1 * set( B(k,j ) );
1660  xmm2 = xmm2 - a1 * set( B(k,j+1UL) );
1661  }
1662  store( &(~C)(i,j ), xmm1 );
1663  store( &(~C)(i,j+1UL), xmm2 );
1664  }
1665  if( j < N ) {
1666  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1667  for( size_t k=0UL; k<K; ++k ) {
1668  xmm1 = xmm1 - A.get(i,k) * set( B(k,j) );
1669  }
1670  store( &(~C)(i,j), xmm1 );
1671  }
1672  }
1673  }
1675  //**********************************************************************************************
1676 
1677  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1691  template< typename MT3 // Type of the left-hand side target matrix
1692  , typename MT4 // Type of the left-hand side matrix operand
1693  , typename MT5 > // Type of the right-hand side matrix operand
1694  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1695  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1696  {
1697  selectDefaultSubAssignKernel( C, A, B );
1698  }
1700  //**********************************************************************************************
1701 
1702  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1703 #if BLAZE_BLAS_MODE
1704 
1717  template< typename MT3 // Type of the left-hand side target matrix
1718  , typename MT4 // Type of the left-hand side matrix operand
1719  , typename MT5 > // Type of the right-hand side matrix operand
1720  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1721  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1722  {
1723  using boost::numeric_cast;
1724 
1725  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1726  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1727  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1728 
1729  const int M ( numeric_cast<int>( A.rows() ) );
1730  const int N ( numeric_cast<int>( B.columns() ) );
1731  const int K ( numeric_cast<int>( A.columns() ) );
1732  const int lda( numeric_cast<int>( A.spacing() ) );
1733  const int ldb( numeric_cast<int>( B.spacing() ) );
1734  const int ldc( numeric_cast<int>( C.spacing() ) );
1735 
1736  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1737  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1738  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1739  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1740  }
1742 #endif
1743  //**********************************************************************************************
1744 
1745  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1746 #if BLAZE_BLAS_MODE
1747 
1760  template< typename MT3 // Type of the left-hand side target matrix
1761  , typename MT4 // Type of the left-hand side matrix operand
1762  , typename MT5 > // Type of the right-hand side matrix operand
1763  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1764  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1765  {
1766  using boost::numeric_cast;
1767 
1768  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1769  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1770  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1771 
1772  const int M ( numeric_cast<int>( A.rows() ) );
1773  const int N ( numeric_cast<int>( B.columns() ) );
1774  const int K ( numeric_cast<int>( A.columns() ) );
1775  const int lda( numeric_cast<int>( A.spacing() ) );
1776  const int ldb( numeric_cast<int>( B.spacing() ) );
1777  const int ldc( numeric_cast<int>( C.spacing() ) );
1778 
1779  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1780  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1781  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1782  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1783  }
1785 #endif
1786  //**********************************************************************************************
1787 
1788  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1789 #if BLAZE_BLAS_MODE
1790 
1803  template< typename MT3 // Type of the left-hand side target matrix
1804  , typename MT4 // Type of the left-hand side matrix operand
1805  , typename MT5 > // Type of the right-hand side matrix operand
1806  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1807  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1808  {
1809  using boost::numeric_cast;
1810 
1811  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1812  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1813  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1814  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1815  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1816  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1817 
1818  const int M ( numeric_cast<int>( A.rows() ) );
1819  const int N ( numeric_cast<int>( B.columns() ) );
1820  const int K ( numeric_cast<int>( A.columns() ) );
1821  const int lda( numeric_cast<int>( A.spacing() ) );
1822  const int ldb( numeric_cast<int>( B.spacing() ) );
1823  const int ldc( numeric_cast<int>( C.spacing() ) );
1824  const complex<float> alpha( -1.0F, 0.0F );
1825  const complex<float> beta ( 1.0F, 0.0F );
1826 
1827  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1828  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1829  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1830  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1831  }
1833 #endif
1834  //**********************************************************************************************
1835 
1836  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1837 #if BLAZE_BLAS_MODE
1838 
1851  template< typename MT3 // Type of the left-hand side target matrix
1852  , typename MT4 // Type of the left-hand side matrix operand
1853  , typename MT5 > // Type of the right-hand side matrix operand
1854  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1855  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1856  {
1857  using boost::numeric_cast;
1858 
1859  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1860  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1861  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1862  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1863  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1864  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1865 
1866  const int M ( numeric_cast<int>( A.rows() ) );
1867  const int N ( numeric_cast<int>( B.columns() ) );
1868  const int K ( numeric_cast<int>( A.columns() ) );
1869  const int lda( numeric_cast<int>( A.spacing() ) );
1870  const int ldb( numeric_cast<int>( B.spacing() ) );
1871  const int ldc( numeric_cast<int>( C.spacing() ) );
1872  const complex<double> alpha( -1.0, 0.0 );
1873  const complex<double> beta ( 1.0, 0.0 );
1874 
1875  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1876  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1877  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1878  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1879  }
1881 #endif
1882  //**********************************************************************************************
1883 
1884  //**Subtraction assignment to sparse matrices***************************************************
1885  // No special implementation for the subtraction assignment to sparse matrices.
1886  //**********************************************************************************************
1887 
1888  //**Multiplication assignment to dense matrices*************************************************
1889  // No special implementation for the multiplication assignment to dense matrices.
1890  //**********************************************************************************************
1891 
1892  //**Multiplication assignment to sparse matrices************************************************
1893  // No special implementation for the multiplication assignment to sparse matrices.
1894  //**********************************************************************************************
1895 
1896  //**Compile time checks*************************************************************************
1903  //**********************************************************************************************
1904 };
1905 //*************************************************************************************************
1906 
1907 
1908 
1909 
1910 //=================================================================================================
1911 //
1912 // DMATSCALARMULTEXPR SPECIALIZATION
1913 //
1914 //=================================================================================================
1915 
1916 //*************************************************************************************************
1924 template< typename MT1 // Type of the left-hand side dense matrix
1925  , typename MT2 // Type of the right-hand side dense matrix
1926  , typename ST > // Type of the right-hand side scalar value
1927 class DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >
1928  : public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
1929  , private Expression
1930  , private Computation
1931 {
1932  private:
1933  //**Type definitions****************************************************************************
1934  typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
1935  typedef typename MMM::ResultType RES;
1936  typedef typename MT1::ResultType RT1;
1937  typedef typename MT2::ResultType RT2;
1938  typedef typename MT1::CompositeType CT1;
1939  typedef typename MT2::CompositeType CT2;
1940  //**********************************************************************************************
1941 
1942  //**********************************************************************************************
1944 
1947  template< typename T1, typename T2, typename T3, typename T4 >
1948  struct UseSinglePrecisionKernel {
1949  enum { value = IsFloat<typename T1::ElementType>::value &&
1950  IsFloat<typename T2::ElementType>::value &&
1951  IsFloat<typename T3::ElementType>::value &&
1952  !IsComplex<T4>::value };
1953  };
1954  //**********************************************************************************************
1955 
1956  //**********************************************************************************************
1958 
1961  template< typename T1, typename T2, typename T3, typename T4 >
1962  struct UseDoublePrecisionKernel {
1963  enum { value = IsDouble<typename T1::ElementType>::value &&
1964  IsDouble<typename T2::ElementType>::value &&
1965  IsDouble<typename T3::ElementType>::value &&
1966  !IsComplex<T4>::value };
1967  };
1968  //**********************************************************************************************
1969 
1970  //**********************************************************************************************
1972 
1975  template< typename T1, typename T2, typename T3 >
1976  struct UseSinglePrecisionComplexKernel {
1977  typedef complex<float> Type;
1978  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1979  IsSame<typename T2::ElementType,Type>::value &&
1980  IsSame<typename T3::ElementType,Type>::value };
1981  };
1982  //**********************************************************************************************
1983 
1984  //**********************************************************************************************
1986 
1989  template< typename T1, typename T2, typename T3 >
1990  struct UseDoublePrecisionComplexKernel {
1991  typedef complex<double> Type;
1992  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1993  IsSame<typename T2::ElementType,Type>::value &&
1994  IsSame<typename T3::ElementType,Type>::value };
1995  };
1996  //**********************************************************************************************
1997 
1998  //**********************************************************************************************
2000 
2002  template< typename T1, typename T2, typename T3, typename T4 >
2003  struct UseDefaultKernel {
2004  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2005  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2006  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2007  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2008  };
2009  //**********************************************************************************************
2010 
2011  //**********************************************************************************************
2013 
2015  template< typename T1, typename T2, typename T3, typename T4 >
2016  struct UseVectorizedDefaultKernel {
2017  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2018  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2019  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2020  IsSame<typename T1::ElementType,T4>::value &&
2021  IntrinsicTrait<typename T1::ElementType>::addition &&
2022  IntrinsicTrait<typename T1::ElementType>::multiplication };
2023  };
2024  //**********************************************************************************************
2025 
2026  public:
2027  //**Type definitions****************************************************************************
2028  typedef DMatScalarMultExpr<MMM,ST,true> This;
2029  typedef typename MultTrait<RES,ST>::Type ResultType;
2030  typedef typename ResultType::OppositeType OppositeType;
2031  typedef typename ResultType::TransposeType TransposeType;
2032  typedef typename ResultType::ElementType ElementType;
2033  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2034  typedef const ElementType ReturnType;
2035  typedef const ResultType CompositeType;
2036 
2038  typedef const TDMatTDMatMultExpr<MT1,MT2> LeftOperand;
2039 
2041  typedef typename SelectType< IsNumeric<ElementType>::value, ElementType, ST >::Type RightOperand;
2042 
2044  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
2045 
2047  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
2048  //**********************************************************************************************
2049 
2050  //**Compilation flags***************************************************************************
2052  enum { vectorizable = 0 };
2053  //**********************************************************************************************
2054 
2055  //**Constructor*********************************************************************************
2061  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2062  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2063  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2064  {}
2065  //**********************************************************************************************
2066 
2067  //**Access operator*****************************************************************************
2074  inline ReturnType operator()( size_t i, size_t j ) const {
2075  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2076  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2077  return matrix_(i,j) * scalar_;
2078  }
2079  //**********************************************************************************************
2080 
2081  //**Rows function*******************************************************************************
2086  inline size_t rows() const {
2087  return matrix_.rows();
2088  }
2089  //**********************************************************************************************
2090 
2091  //**Columns function****************************************************************************
2096  inline size_t columns() const {
2097  return matrix_.columns();
2098  }
2099  //**********************************************************************************************
2100 
2101  //**Left operand access*************************************************************************
2106  inline LeftOperand leftOperand() const {
2107  return matrix_;
2108  }
2109  //**********************************************************************************************
2110 
2111  //**Right operand access************************************************************************
2116  inline RightOperand rightOperand() const {
2117  return scalar_;
2118  }
2119  //**********************************************************************************************
2120 
2121  //**********************************************************************************************
2127  template< typename T >
2128  inline bool canAlias( const T* alias ) const {
2129  return matrix_.canAlias( alias );
2130  }
2131  //**********************************************************************************************
2132 
2133  //**********************************************************************************************
2139  template< typename T >
2140  inline bool isAliased( const T* alias ) const {
2141  return matrix_.isAliased( alias );
2142  }
2143  //**********************************************************************************************
2144 
2145  private:
2146  //**Member variables****************************************************************************
2147  LeftOperand matrix_;
2148  RightOperand scalar_;
2149  //**********************************************************************************************
2150 
2151  //**Assignment to dense matrices****************************************************************
2160  template< typename MT3 // Type of the target dense matrix
2161  , bool SO > // Storage order of the target dense matrix
2162  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2163  {
2165 
2166  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2167  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2168 
2169  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2170  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2171 
2172  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2173  return;
2174  }
2175  else if( left.columns() == 0UL ) {
2176  reset( ~lhs );
2177  return;
2178  }
2179 
2180  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2181  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2182 
2183  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2184  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2185  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2186  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2187  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2188  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2189 
2190  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
2191  DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2192  else
2193  DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2194  }
2195  //**********************************************************************************************
2196 
2197  //**Default assignment to dense matrices********************************************************
2211  template< typename MT3 // Type of the left-hand side target matrix
2212  , typename MT4 // Type of the left-hand side matrix operand
2213  , typename MT5 // Type of the right-hand side matrix operand
2214  , typename ST2 > // Type of the scalar value
2215  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2216  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2217  {
2218  for( size_t i=0UL; i<A.rows(); ++i ) {
2219  for( size_t k=0UL; k<B.columns(); ++k ) {
2220  C(i,k) = A(i,0UL) * B(0UL,k);
2221  }
2222  for( size_t j=1UL; j<A.columns(); ++j ) {
2223  for( size_t k=0UL; k<B.columns(); ++k ) {
2224  C(i,k) += A(i,j) * B(j,k);
2225  }
2226  }
2227  for( size_t k=0UL; k<B.columns(); ++k ) {
2228  C(i,k) *= scalar;
2229  }
2230  }
2231  }
2232  //**********************************************************************************************
2233 
2234  //**Vectorized default assignment to row-major dense matrices***********************************
2248  template< typename MT3 // Type of the left-hand side target matrix
2249  , typename MT4 // Type of the left-hand side matrix operand
2250  , typename MT5 // Type of the right-hand side matrix operand
2251  , typename ST2 > // Type of the scalar value
2252  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2253  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2254  {
2255  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2256  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2257 
2258  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2259  const typename MT5::OppositeType tmp( B );
2260  assign( ~C, A * tmp * scalar );
2261  }
2262  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2263  const typename MT4::OppositeType tmp( A );
2264  assign( ~C, tmp * B * scalar );
2265  }
2266  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2267  const typename MT5::OppositeType tmp( B );
2268  assign( ~C, A * tmp * scalar );
2269  }
2270  else {
2271  const typename MT4::OppositeType tmp( A );
2272  assign( ~C, tmp * B * scalar );
2273  }
2274  }
2275  //**********************************************************************************************
2276 
2277  //**Vectorized default assignment to column-major dense matrices********************************
2291  template< typename MT3 // Type of the left-hand side target matrix
2292  , typename MT4 // Type of the left-hand side matrix operand
2293  , typename MT5 // Type of the right-hand side matrix operand
2294  , typename ST2 > // Type of the scalar value
2295  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2296  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2297  {
2298  typedef IntrinsicTrait<ElementType> IT;
2299 
2300  const size_t M( A.spacing() );
2301  const size_t N( B.columns() );
2302  const size_t K( A.columns() );
2303 
2304  const IntrinsicType factor( set( scalar ) );
2305 
2306  size_t i( 0UL );
2307 
2308  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2309  for( size_t j=0UL; j<N; ++j ) {
2310  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2311  for( size_t k=0UL; k<K; ++k ) {
2312  const IntrinsicType b1( set( B(k,j) ) );
2313  xmm1 = xmm1 + A.get(i ,k) * b1;
2314  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2315  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2316  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2317  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
2318  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
2319  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
2320  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
2321  }
2322  store( &(~C)(i ,j), xmm1 * factor );
2323  store( &(~C)(i+IT::size ,j), xmm2 * factor );
2324  store( &(~C)(i+IT::size*2UL,j), xmm3 * factor );
2325  store( &(~C)(i+IT::size*3UL,j), xmm4 * factor );
2326  store( &(~C)(i+IT::size*4UL,j), xmm5 * factor );
2327  store( &(~C)(i+IT::size*5UL,j), xmm6 * factor );
2328  store( &(~C)(i+IT::size*6UL,j), xmm7 * factor );
2329  store( &(~C)(i+IT::size*7UL,j), xmm8 * factor );
2330  }
2331  }
2332  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2333  size_t j( 0UL );
2334  for( ; (j+2UL) <= N; j+=2UL ) {
2335  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2336  for( size_t k=0UL; k<K; ++k ) {
2337  const IntrinsicType a1( A.get(i ,k) );
2338  const IntrinsicType a2( A.get(i+IT::size ,k) );
2339  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
2340  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
2341  const IntrinsicType b1( set( B(k,j ) ) );
2342  const IntrinsicType b2( set( B(k,j+1UL) ) );
2343  xmm1 = xmm1 + a1 * b1;
2344  xmm2 = xmm2 + a2 * b1;
2345  xmm3 = xmm3 + a3 * b1;
2346  xmm4 = xmm4 + a4 * b1;
2347  xmm5 = xmm5 + a1 * b2;
2348  xmm6 = xmm6 + a2 * b2;
2349  xmm7 = xmm7 + a3 * b2;
2350  xmm8 = xmm8 + a4 * b2;
2351  }
2352  store( &(~C)(i ,j ), xmm1 * factor );
2353  store( &(~C)(i+IT::size ,j ), xmm2 * factor );
2354  store( &(~C)(i+IT::size*2UL,j ), xmm3 * factor );
2355  store( &(~C)(i+IT::size*3UL,j ), xmm4 * factor );
2356  store( &(~C)(i ,j+1UL), xmm5 * factor );
2357  store( &(~C)(i+IT::size ,j+1UL), xmm6 * factor );
2358  store( &(~C)(i+IT::size*2UL,j+1UL), xmm7 * factor );
2359  store( &(~C)(i+IT::size*3UL,j+1UL), xmm8 * factor );
2360  }
2361  if( j < N ) {
2362  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2363  for( size_t k=0UL; k<K; ++k ) {
2364  const IntrinsicType b1( set( B(k,j) ) );
2365  xmm1 = xmm1 + A.get(i ,k) * b1;
2366  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2367  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2368  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2369  }
2370  store( &(~C)(i ,j), xmm1 * factor );
2371  store( &(~C)(i+IT::size ,j), xmm2 * factor );
2372  store( &(~C)(i+IT::size*2UL,j), xmm3 * factor );
2373  store( &(~C)(i+IT::size*3UL,j), xmm4 * factor );
2374  }
2375  }
2376  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2377  size_t j( 0UL );
2378  for( ; (j+2UL) <= N; j+=2UL ) {
2379  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2380  for( size_t k=0UL; k<K; ++k ) {
2381  const IntrinsicType a1( A.get(i ,k) );
2382  const IntrinsicType a2( A.get(i+IT::size,k) );
2383  const IntrinsicType b1( set( B(k,j ) ) );
2384  const IntrinsicType b2( set( B(k,j+1UL) ) );
2385  xmm1 = xmm1 + a1 * b1;
2386  xmm2 = xmm2 + a2 * b1;
2387  xmm3 = xmm3 + a1 * b2;
2388  xmm4 = xmm4 + a2 * b2;
2389  }
2390  store( &(~C)(i ,j ), xmm1 * factor );
2391  store( &(~C)(i+IT::size,j ), xmm2 * factor );
2392  store( &(~C)(i ,j+1UL), xmm3 * factor );
2393  store( &(~C)(i+IT::size,j+1UL), xmm4 * factor );
2394  }
2395  if( j < N ) {
2396  IntrinsicType xmm1, xmm2;
2397  for( size_t k=0UL; k<K; ++k ) {
2398  const IntrinsicType b1( set( B(k,j) ) );
2399  xmm1 = xmm1 + A.get(i ,k) * b1;
2400  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
2401  }
2402  store( &(~C)(i ,j), xmm1 * factor );
2403  store( &(~C)(i+IT::size,j), xmm2 * factor );
2404  }
2405  }
2406  if( i < M ) {
2407  size_t j( 0UL );
2408  for( ; (j+2UL) <= N; j+=2UL ) {
2409  IntrinsicType xmm1, xmm2;
2410  for( size_t k=0UL; k<K; ++k ) {
2411  const IntrinsicType a1( A.get(i,k) );
2412  xmm1 = xmm1 + a1 * set( B(k,j ) );
2413  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2414  }
2415  store( &(~C)(i,j ), xmm1 * factor );
2416  store( &(~C)(i,j+1UL), xmm2 * factor );
2417  }
2418  if( j < N ) {
2419  IntrinsicType xmm1;
2420  for( size_t k=0UL; k<K; ++k ) {
2421  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
2422  }
2423  store( &(~C)(i,j), xmm1 * factor );
2424  }
2425  }
2426  }
2427  //**********************************************************************************************
2428 
2429  //**BLAS-based assignment to dense matrices (default)*******************************************
2443  template< typename MT3 // Type of the left-hand side target matrix
2444  , typename MT4 // Type of the left-hand side matrix operand
2445  , typename MT5 // Type of the right-hand side matrix operand
2446  , typename ST2 > // Type of the scalar value
2447  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2448  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2449  {
2450  selectDefaultAssignKernel( C, A, B, scalar );
2451  }
2452  //**********************************************************************************************
2453 
2454  //**BLAS-based assignment to dense matrices (single precision)**********************************
2455 #if BLAZE_BLAS_MODE
2456 
2469  template< typename MT3 // Type of the left-hand side target matrix
2470  , typename MT4 // Type of the left-hand side matrix operand
2471  , typename MT5 // Type of the right-hand side matrix operand
2472  , typename ST2 > // Type of the scalar value
2473  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2474  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2475  {
2476  using boost::numeric_cast;
2477 
2478  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2479  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2480  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2481 
2482  const int M ( numeric_cast<int>( A.rows() ) );
2483  const int N ( numeric_cast<int>( B.columns() ) );
2484  const int K ( numeric_cast<int>( A.columns() ) );
2485  const int lda( numeric_cast<int>( A.spacing() ) );
2486  const int ldb( numeric_cast<int>( B.spacing() ) );
2487  const int ldc( numeric_cast<int>( C.spacing() ) );
2488 
2489  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2490  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2491  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2492  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2493  }
2494 #endif
2495  //**********************************************************************************************
2496 
2497  //**BLAS-based assignment to dense matrices (double precision)**********************************
2498 #if BLAZE_BLAS_MODE
2499 
2512  template< typename MT3 // Type of the left-hand side target matrix
2513  , typename MT4 // Type of the left-hand side matrix operand
2514  , typename MT5 // Type of the right-hand side matrix operand
2515  , typename ST2 > // Type of the scalar value
2516  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2517  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2518  {
2519  using boost::numeric_cast;
2520 
2521  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
2522  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
2523  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
2524 
2525  const int M ( numeric_cast<int>( A.rows() ) );
2526  const int N ( numeric_cast<int>( B.columns() ) );
2527  const int K ( numeric_cast<int>( A.columns() ) );
2528  const int lda( numeric_cast<int>( A.spacing() ) );
2529  const int ldb( numeric_cast<int>( B.spacing() ) );
2530  const int ldc( numeric_cast<int>( C.spacing() ) );
2531 
2532  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2533  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2534  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2535  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2536  }
2537 #endif
2538  //**********************************************************************************************
2539 
2540  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2541 #if BLAZE_BLAS_MODE
2542 
2555  template< typename MT3 // Type of the left-hand side target matrix
2556  , typename MT4 // Type of the left-hand side matrix operand
2557  , typename MT5 // Type of the right-hand side matrix operand
2558  , typename ST2 > // Type of the scalar value
2559  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2560  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2561  {
2562  using boost::numeric_cast;
2563 
2564  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2565  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2566  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2568  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2569  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2570  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2571 
2572  const int M ( numeric_cast<int>( A.rows() ) );
2573  const int N ( numeric_cast<int>( B.columns() ) );
2574  const int K ( numeric_cast<int>( A.columns() ) );
2575  const int lda( numeric_cast<int>( A.spacing() ) );
2576  const int ldb( numeric_cast<int>( B.spacing() ) );
2577  const int ldc( numeric_cast<int>( C.spacing() ) );
2578  const complex<float> alpha( scalar );
2579  const complex<float> beta ( 0.0F, 0.0F );
2580 
2581  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2582  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2583  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2584  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2585  }
2586 #endif
2587  //**********************************************************************************************
2588 
2589  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2590 #if BLAZE_BLAS_MODE
2591 
2604  template< typename MT3 // Type of the left-hand side target matrix
2605  , typename MT4 // Type of the left-hand side matrix operand
2606  , typename MT5 // Type of the right-hand side matrix operand
2607  , typename ST2 > // Type of the scalar value
2608  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2609  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2610  {
2611  using boost::numeric_cast;
2612 
2613  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2614  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2615  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2617  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2618  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2619  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2620 
2621  const int M ( numeric_cast<int>( A.rows() ) );
2622  const int N ( numeric_cast<int>( B.columns() ) );
2623  const int K ( numeric_cast<int>( A.columns() ) );
2624  const int lda( numeric_cast<int>( A.spacing() ) );
2625  const int ldb( numeric_cast<int>( B.spacing() ) );
2626  const int ldc( numeric_cast<int>( C.spacing() ) );
2627  const complex<double> alpha( scalar );
2628  const complex<double> beta ( 0.0, 0.0 );
2629 
2630  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2631  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2632  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2633  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2634  }
2635 #endif
2636  //**********************************************************************************************
2637 
2638  //**Assignment to sparse matrices***************************************************************
2650  template< typename MT // Type of the target sparse matrix
2651  , bool SO > // Storage order of the target sparse matrix
2652  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2653  {
2655 
2656  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2657 
2663  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
2664 
2665  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2666  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2667 
2668  const TmpType tmp( rhs );
2669  assign( ~lhs, tmp );
2670  }
2671  //**********************************************************************************************
2672 
2673  //**Addition assignment to dense matrices*******************************************************
2685  template< typename MT3 // Type of the target dense matrix
2686  , bool SO > // Storage order of the target dense matrix
2687  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2688  {
2690 
2691  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2692  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2693 
2694  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2695  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2696 
2697  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2698  return;
2699  }
2700 
2701  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2702  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2703 
2704  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2705  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2706  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2707  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2708  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2709  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2710 
2711  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
2712  DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2713  else
2714  DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2715  }
2716  //**********************************************************************************************
2717 
2718  //**Default addition assignment to dense matrices***********************************************
2732  template< typename MT3 // Type of the left-hand side target matrix
2733  , typename MT4 // Type of the left-hand side matrix operand
2734  , typename MT5 // Type of the right-hand side matrix operand
2735  , typename ST2 > // Type of the scalar value
2736  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2737  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2738  {
2739  const ResultType tmp( A * B * scalar );
2740  addAssign( C, tmp );
2741  }
2742  //**********************************************************************************************
2743 
2744  //**Vectorized default addition assignment to row-major dense matrices**************************
2758  template< typename MT3 // Type of the left-hand side target matrix
2759  , typename MT4 // Type of the left-hand side matrix operand
2760  , typename MT5 // Type of the right-hand side matrix operand
2761  , typename ST2 > // Type of the scalar value
2762  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2763  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2764  {
2765  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2766  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2767 
2768  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2769  const typename MT5::OppositeType tmp( B );
2770  addAssign( ~C, A * tmp * scalar );
2771  }
2772  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2773  const typename MT4::OppositeType tmp( A );
2774  addAssign( ~C, tmp * B * scalar );
2775  }
2776  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2777  const typename MT5::OppositeType tmp( B );
2778  addAssign( ~C, A * tmp * scalar );
2779  }
2780  else {
2781  const typename MT4::OppositeType tmp( A );
2782  addAssign( ~C, tmp * B * scalar );
2783  }
2784  }
2785  //**********************************************************************************************
2786 
2787  //**Vectorized default addition assignment to column-major dense matrices***********************
2801  template< typename MT3 // Type of the left-hand side target matrix
2802  , typename MT4 // Type of the left-hand side matrix operand
2803  , typename MT5 // Type of the right-hand side matrix operand
2804  , typename ST2 > // Type of the scalar value
2805  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2806  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2807  {
2808  typedef IntrinsicTrait<ElementType> IT;
2809 
2810  const size_t M( A.spacing() );
2811  const size_t N( B.columns() );
2812  const size_t K( A.columns() );
2813 
2814  const IntrinsicType factor( set( scalar ) );
2815 
2816  size_t i( 0UL );
2817 
2818  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
2819  for( size_t j=0UL; j<N; ++j ) {
2820  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2821  for( size_t k=0UL; k<K; ++k ) {
2822  const IntrinsicType b1( set( B(k,j) ) );
2823  xmm1 = xmm1 + A.get(i ,k) * b1;
2824  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2825  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2826  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2827  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
2828  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
2829  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
2830  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
2831  }
2832  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2833  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) + xmm2 * factor );
2834  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) + xmm3 * factor );
2835  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) + xmm4 * factor );
2836  store( &(~C)(i+IT::size*4UL,j), load( &(~C)(i+IT::size*4UL,j) ) + xmm5 * factor );
2837  store( &(~C)(i+IT::size*5UL,j), load( &(~C)(i+IT::size*5UL,j) ) + xmm6 * factor );
2838  store( &(~C)(i+IT::size*6UL,j), load( &(~C)(i+IT::size*6UL,j) ) + xmm7 * factor );
2839  store( &(~C)(i+IT::size*7UL,j), load( &(~C)(i+IT::size*7UL,j) ) + xmm8 * factor );
2840  }
2841  }
2842  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
2843  size_t j( 0UL );
2844  for( ; (j+2UL) <= N; j+=2UL ) {
2845  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2846  for( size_t k=0UL; k<K; ++k ) {
2847  const IntrinsicType a1( A.get(i ,k) );
2848  const IntrinsicType a2( A.get(i+IT::size ,k) );
2849  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
2850  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
2851  const IntrinsicType b1( set( B(k,j ) ) );
2852  const IntrinsicType b2( set( B(k,j+1UL) ) );
2853  xmm1 = xmm1 + a1 * b1;
2854  xmm2 = xmm2 + a2 * b1;
2855  xmm3 = xmm3 + a3 * b1;
2856  xmm4 = xmm4 + a4 * b1;
2857  xmm5 = xmm5 + a1 * b2;
2858  xmm6 = xmm6 + a2 * b2;
2859  xmm7 = xmm7 + a3 * b2;
2860  xmm8 = xmm8 + a4 * b2;
2861  }
2862  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2863  store( &(~C)(i+IT::size ,j ), load( &(~C)(i+IT::size ,j ) ) + xmm2 * factor );
2864  store( &(~C)(i+IT::size*2UL,j ), load( &(~C)(i+IT::size*2UL,j ) ) + xmm3 * factor );
2865  store( &(~C)(i+IT::size*3UL,j ), load( &(~C)(i+IT::size*3UL,j ) ) + xmm4 * factor );
2866  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) + xmm5 * factor );
2867  store( &(~C)(i+IT::size ,j+1UL), load( &(~C)(i+IT::size ,j+1UL) ) + xmm6 * factor );
2868  store( &(~C)(i+IT::size*2UL,j+1UL), load( &(~C)(i+IT::size*2UL,j+1UL) ) + xmm7 * factor );
2869  store( &(~C)(i+IT::size*3UL,j+1UL), load( &(~C)(i+IT::size*3UL,j+1UL) ) + xmm8 * factor );
2870  }
2871  if( j < N ) {
2872  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2873  for( size_t k=0UL; k<K; ++k ) {
2874  const IntrinsicType b1( set( B(k,j) ) );
2875  xmm1 = xmm1 + A.get(i ,k) * b1;
2876  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
2877  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
2878  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
2879  }
2880  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2881  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) + xmm2 * factor );
2882  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) + xmm3 * factor );
2883  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) + xmm4 * factor );
2884  }
2885  }
2886  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
2887  size_t j( 0UL );
2888  for( ; (j+2UL) <= N; j+=2UL ) {
2889  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2890  for( size_t k=0UL; k<K; ++k ) {
2891  const IntrinsicType a1( A.get(i ,k) );
2892  const IntrinsicType a2( A.get(i+IT::size,k) );
2893  const IntrinsicType b1( set( B(k,j ) ) );
2894  const IntrinsicType b2( set( B(k,j+1UL) ) );
2895  xmm1 = xmm1 + a1 * b1;
2896  xmm2 = xmm2 + a2 * b1;
2897  xmm3 = xmm3 + a1 * b2;
2898  xmm4 = xmm4 + a2 * b2;
2899  }
2900  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2901  store( &(~C)(i+IT::size,j ), load( &(~C)(i+IT::size,j ) ) + xmm2 * factor );
2902  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) + xmm3 * factor );
2903  store( &(~C)(i+IT::size,j+1UL), load( &(~C)(i+IT::size,j+1UL) ) + xmm4 * factor );
2904  }
2905  if( j < N ) {
2906  IntrinsicType xmm1, xmm2;
2907  for( size_t k=0UL; k<K; ++k ) {
2908  const IntrinsicType b1( set( B(k,j) ) );
2909  xmm1 = xmm1 + A.get(i ,k) * b1;
2910  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
2911  }
2912  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2913  store( &(~C)(i+IT::size,j), load( &(~C)(i+IT::size,j) ) + xmm2 * factor );
2914  }
2915  }
2916  if( i < M ) {
2917  size_t j( 0UL );
2918  for( ; (j+2UL) <= N; j+=2UL ) {
2919  IntrinsicType xmm1, xmm2;
2920  for( size_t k=0UL; k<K; ++k ) {
2921  const IntrinsicType a1( A.get(i,k) );
2922  xmm1 = xmm1 + a1 * set( B(k,j ) );
2923  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2924  }
2925  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2926  store( &(~C)(i,j+1UL), load( &(~C)(i,j+1UL) ) + xmm2 * factor );
2927  }
2928  if( j < N ) {
2929  IntrinsicType xmm1;
2930  for( size_t k=0UL; k<K; ++k ) {
2931  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
2932  }
2933  store( &(~C)(i,j), load( &(~C)(i,j) ) + xmm1 * factor );
2934  }
2935  }
2936  }
2937  //**********************************************************************************************
2938 
2939  //**BLAS-based addition assignment to dense matrices (default)**********************************
2953  template< typename MT3 // Type of the left-hand side target matrix
2954  , typename MT4 // Type of the left-hand side matrix operand
2955  , typename MT5 // Type of the right-hand side matrix operand
2956  , typename ST2 > // Type of the scalar value
2957  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2958  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2959  {
2960  selectDefaultAddAssignKernel( C, A, B, scalar );
2961  }
2962  //**********************************************************************************************
2963 
2964  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2965 #if BLAZE_BLAS_MODE
2966 
2979  template< typename MT3 // Type of the left-hand side target matrix
2980  , typename MT4 // Type of the left-hand side matrix operand
2981  , typename MT5 // Type of the right-hand side matrix operand
2982  , typename ST2 > // Type of the scalar value
2983  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2984  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2985  {
2986  using boost::numeric_cast;
2987 
2988  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2989  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2990  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2991 
2992  const int M ( numeric_cast<int>( A.rows() ) );
2993  const int N ( numeric_cast<int>( B.columns() ) );
2994  const int K ( numeric_cast<int>( A.columns() ) );
2995  const int lda( numeric_cast<int>( A.spacing() ) );
2996  const int ldb( numeric_cast<int>( B.spacing() ) );
2997  const int ldc( numeric_cast<int>( C.spacing() ) );
2998 
2999  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3000  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3001  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3002  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3003  }
3004 #endif
3005  //**********************************************************************************************
3006 
3007  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3008 #if BLAZE_BLAS_MODE
3009 
3022  template< typename MT3 // Type of the left-hand side target matrix
3023  , typename MT4 // Type of the left-hand side matrix operand
3024  , typename MT5 // Type of the right-hand side matrix operand
3025  , typename ST2 > // Type of the scalar value
3026  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3027  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3028  {
3029  using boost::numeric_cast;
3030 
3031  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3032  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3033  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3034 
3035  const int M ( numeric_cast<int>( A.rows() ) );
3036  const int N ( numeric_cast<int>( B.columns() ) );
3037  const int K ( numeric_cast<int>( A.columns() ) );
3038  const int lda( numeric_cast<int>( A.spacing() ) );
3039  const int ldb( numeric_cast<int>( B.spacing() ) );
3040  const int ldc( numeric_cast<int>( C.spacing() ) );
3041 
3042  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3043  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3044  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3045  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3046  }
3047 #endif
3048  //**********************************************************************************************
3049 
3050  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3051 #if BLAZE_BLAS_MODE
3052 
3065  template< typename MT3 // Type of the left-hand side target matrix
3066  , typename MT4 // Type of the left-hand side matrix operand
3067  , typename MT5 // Type of the right-hand side matrix operand
3068  , typename ST2 > // Type of the scalar value
3069  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3070  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3071  {
3072  using boost::numeric_cast;
3073 
3074  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3075  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3076  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3078  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3079  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3080  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3081 
3082  const int M ( numeric_cast<int>( A.rows() ) );
3083  const int N ( numeric_cast<int>( B.columns() ) );
3084  const int K ( numeric_cast<int>( A.columns() ) );
3085  const int lda( numeric_cast<int>( A.spacing() ) );
3086  const int ldb( numeric_cast<int>( B.spacing() ) );
3087  const int ldc( numeric_cast<int>( C.spacing() ) );
3088  const complex<float> alpha( scalar );
3089  const complex<float> beta ( 1.0F, 0.0F );
3090 
3091  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3092  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3093  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3094  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3095  }
3096 #endif
3097  //**********************************************************************************************
3098 
3099  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3100 #if BLAZE_BLAS_MODE
3101 
3114  template< typename MT3 // Type of the left-hand side target matrix
3115  , typename MT4 // Type of the left-hand side matrix operand
3116  , typename MT5 // Type of the right-hand side matrix operand
3117  , typename ST2 > // Type of the scalar value
3118  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3119  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3120  {
3121  using boost::numeric_cast;
3122 
3123  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3124  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3125  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3127  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3128  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3129  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3130 
3131  const int M ( numeric_cast<int>( A.rows() ) );
3132  const int N ( numeric_cast<int>( B.columns() ) );
3133  const int K ( numeric_cast<int>( A.columns() ) );
3134  const int lda( numeric_cast<int>( A.spacing() ) );
3135  const int ldb( numeric_cast<int>( B.spacing() ) );
3136  const int ldc( numeric_cast<int>( C.spacing() ) );
3137  const complex<double> alpha( scalar );
3138  const complex<double> beta ( 1.0, 0.0 );
3139 
3140  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3141  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3142  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3143  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3144  }
3145 #endif
3146  //**********************************************************************************************
3147 
3148  //**Addition assignment to sparse matrices******************************************************
3149  // No special implementation for the addition assignment to sparse matrices.
3150  //**********************************************************************************************
3151 
3152  //**Subtraction assignment to dense matrices****************************************************
3164  template< typename MT3 // Type of the target dense matrix
3165  , bool SO > // Storage order of the target dense matrix
3166  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3167  {
3169 
3170  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3171  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3172 
3173  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3174  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3175 
3176  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3177  return;
3178  }
3179 
3180  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3181  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3182 
3183  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3184  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3185  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3186  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3187  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3188  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3189 
3190  if( (~lhs).rows() * (~lhs).columns() < TDMATTDMATMULT_THRESHOLD )
3191  DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3192  else
3193  DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3194  }
3195  //**********************************************************************************************
3196 
3197  //**Default subtraction assignment to dense matrices********************************************
3211  template< typename MT3 // Type of the left-hand side target matrix
3212  , typename MT4 // Type of the left-hand side matrix operand
3213  , typename MT5 // Type of the right-hand side matrix operand
3214  , typename ST2 > // Type of the scalar value
3215  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3216  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3217  {
3218  const ResultType tmp( A * B * scalar );
3219  subAssign( C, tmp );
3220  }
3221  //**********************************************************************************************
3222 
3223  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3237  template< typename MT3 // Type of the left-hand side target matrix
3238  , typename MT4 // Type of the left-hand side matrix operand
3239  , typename MT5 // Type of the right-hand side matrix operand
3240  , typename ST2 > // Type of the scalar value
3241  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3242  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3243  {
3244  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
3245  BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
3246 
3247  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3248  const typename MT5::OppositeType tmp( B );
3249  subAssign( ~C, A * tmp * scalar );
3250  }
3251  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3252  const typename MT4::OppositeType tmp( A );
3253  subAssign( ~C, tmp * B * scalar );
3254  }
3255  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3256  const typename MT5::OppositeType tmp( B );
3257  subAssign( ~C, A * tmp * scalar );
3258  }
3259  else {
3260  const typename MT4::OppositeType tmp( A );
3261  subAssign( ~C, tmp * B * scalar );
3262  }
3263  }
3264  //**********************************************************************************************
3265 
3266  //**Vectorized default subtraction assignment to column-major dense matrices********************
3280  template< typename MT3 // Type of the left-hand side target matrix
3281  , typename MT4 // Type of the left-hand side matrix operand
3282  , typename MT5 // Type of the right-hand side matrix operand
3283  , typename ST2 > // Type of the scalar value
3284  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3285  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3286  {
3287  typedef IntrinsicTrait<ElementType> IT;
3288 
3289  const size_t M( A.spacing() );
3290  const size_t N( B.columns() );
3291  const size_t K( A.columns() );
3292 
3293  const IntrinsicType factor( set( scalar ) );
3294 
3295  size_t i( 0UL );
3296 
3297  for( ; (i+IT::size*8UL) <= M; i+=IT::size*8UL ) {
3298  for( size_t j=0UL; j<N; ++j ) {
3299  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3300  for( size_t k=0UL; k<K; ++k ) {
3301  const IntrinsicType b1( set( B(k,j) ) );
3302  xmm1 = xmm1 + A.get(i ,k) * b1;
3303  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
3304  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
3305  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
3306  xmm5 = xmm5 + A.get(i+IT::size*4UL,k) * b1;
3307  xmm6 = xmm6 + A.get(i+IT::size*5UL,k) * b1;
3308  xmm7 = xmm7 + A.get(i+IT::size*6UL,k) * b1;
3309  xmm8 = xmm8 + A.get(i+IT::size*7UL,k) * b1;
3310  }
3311  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3312  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) - xmm2 * factor );
3313  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) - xmm3 * factor );
3314  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) - xmm4 * factor );
3315  store( &(~C)(i+IT::size*4UL,j), load( &(~C)(i+IT::size*4UL,j) ) - xmm5 * factor );
3316  store( &(~C)(i+IT::size*5UL,j), load( &(~C)(i+IT::size*5UL,j) ) - xmm6 * factor );
3317  store( &(~C)(i+IT::size*6UL,j), load( &(~C)(i+IT::size*6UL,j) ) - xmm7 * factor );
3318  store( &(~C)(i+IT::size*7UL,j), load( &(~C)(i+IT::size*7UL,j) ) - xmm8 * factor );
3319  }
3320  }
3321  for( ; (i+IT::size*4UL) <= M; i+=IT::size*4UL ) {
3322  size_t j( 0UL );
3323  for( ; (j+2UL) <= N; j+=2UL ) {
3324  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3325  for( size_t k=0UL; k<K; ++k ) {
3326  const IntrinsicType a1( A.get(i ,k) );
3327  const IntrinsicType a2( A.get(i+IT::size ,k) );
3328  const IntrinsicType a3( A.get(i+IT::size*2UL,k) );
3329  const IntrinsicType a4( A.get(i+IT::size*3UL,k) );
3330  const IntrinsicType b1( set( B(k,j ) ) );
3331  const IntrinsicType b2( set( B(k,j+1UL) ) );
3332  xmm1 = xmm1 + a1 * b1;
3333  xmm2 = xmm2 + a2 * b1;
3334  xmm3 = xmm3 + a3 * b1;
3335  xmm4 = xmm4 + a4 * b1;
3336  xmm5 = xmm5 + a1 * b2;
3337  xmm6 = xmm6 + a2 * b2;
3338  xmm7 = xmm7 + a3 * b2;
3339  xmm8 = xmm8 + a4 * b2;
3340  }
3341  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3342  store( &(~C)(i+IT::size ,j ), load( &(~C)(i+IT::size ,j ) ) - xmm2 * factor );
3343  store( &(~C)(i+IT::size*2UL,j ), load( &(~C)(i+IT::size*2UL,j ) ) - xmm3 * factor );
3344  store( &(~C)(i+IT::size*3UL,j ), load( &(~C)(i+IT::size*3UL,j ) ) - xmm4 * factor );
3345  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) - xmm5 * factor );
3346  store( &(~C)(i+IT::size ,j+1UL), load( &(~C)(i+IT::size ,j+1UL) ) - xmm6 * factor );
3347  store( &(~C)(i+IT::size*2UL,j+1UL), load( &(~C)(i+IT::size*2UL,j+1UL) ) - xmm7 * factor );
3348  store( &(~C)(i+IT::size*3UL,j+1UL), load( &(~C)(i+IT::size*3UL,j+1UL) ) - xmm8 * factor );
3349  }
3350  if( j < N ) {
3351  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3352  for( size_t k=0UL; k<K; ++k ) {
3353  const IntrinsicType b1( set( B(k,j) ) );
3354  xmm1 = xmm1 + A.get(i ,k) * b1;
3355  xmm2 = xmm2 + A.get(i+IT::size ,k) * b1;
3356  xmm3 = xmm3 + A.get(i+IT::size*2UL,k) * b1;
3357  xmm4 = xmm4 + A.get(i+IT::size*3UL,k) * b1;
3358  }
3359  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3360  store( &(~C)(i+IT::size ,j), load( &(~C)(i+IT::size ,j) ) - xmm2 * factor );
3361  store( &(~C)(i+IT::size*2UL,j), load( &(~C)(i+IT::size*2UL,j) ) - xmm3 * factor );
3362  store( &(~C)(i+IT::size*3UL,j), load( &(~C)(i+IT::size*3UL,j) ) - xmm4 * factor );
3363  }
3364  }
3365  for( ; (i+IT::size*2UL) <= M; i+=IT::size*2UL ) {
3366  size_t j( 0UL );
3367  for( ; (j+2UL) <= N; j+=2UL ) {
3368  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3369  for( size_t k=0UL; k<K; ++k ) {
3370  const IntrinsicType a1( A.get(i ,k) );
3371  const IntrinsicType a2( A.get(i+IT::size,k) );
3372  const IntrinsicType b1( set( B(k,j ) ) );
3373  const IntrinsicType b2( set( B(k,j+1UL) ) );
3374  xmm1 = xmm1 + a1 * b1;
3375  xmm2 = xmm2 + a2 * b1;
3376  xmm3 = xmm3 + a1 * b2;
3377  xmm4 = xmm4 + a2 * b2;
3378  }
3379  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3380  store( &(~C)(i+IT::size,j ), load( &(~C)(i+IT::size,j ) ) - xmm2 * factor );
3381  store( &(~C)(i ,j+1UL), load( &(~C)(i ,j+1UL) ) - xmm3 * factor );
3382  store( &(~C)(i+IT::size,j+1UL), load( &(~C)(i+IT::size,j+1UL) ) - xmm4 * factor );
3383  }
3384  if( j < N ) {
3385  IntrinsicType xmm1, xmm2;
3386  for( size_t k=0UL; k<K; ++k ) {
3387  const IntrinsicType b1( set( B(k,j) ) );
3388  xmm1 = xmm1 + A.get(i ,k) * b1;
3389  xmm2 = xmm2 + A.get(i+IT::size,k) * b1;
3390  }
3391  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3392  store( &(~C)(i+IT::size,j), load( &(~C)(i+IT::size,j) ) - xmm2 * factor );
3393  }
3394  }
3395  if( i < M ) {
3396  size_t j( 0UL );
3397  for( ; (j+2UL) <= N; j+=2UL ) {
3398  IntrinsicType xmm1, xmm2;
3399  for( size_t k=0UL; k<K; ++k ) {
3400  const IntrinsicType a1( A.get(i,k) );
3401  xmm1 = xmm1 + a1 * set( B(k,j ) );
3402  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3403  }
3404  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3405  store( &(~C)(i,j+1UL), load( &(~C)(i,j+1UL) ) - xmm2 * factor );
3406  }
3407  if( j < N ) {
3408  IntrinsicType xmm1;
3409  for( size_t k=0UL; k<K; ++k ) {
3410  xmm1 = xmm1 + A.get(i,k) * set( B(k,j) );
3411  }
3412  store( &(~C)(i,j), load( &(~C)(i,j) ) - xmm1 * factor );
3413  }
3414  }
3415  }
3416  //**********************************************************************************************
3417 
3418  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3432  template< typename MT3 // Type of the left-hand side target matrix
3433  , typename MT4 // Type of the left-hand side matrix operand
3434  , typename MT5 // Type of the right-hand side matrix operand
3435  , typename ST2 > // Type of the scalar value
3436  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3437  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3438  {
3439  selectDefaultSubAssignKernel( C, A, B, scalar );
3440  }
3441  //**********************************************************************************************
3442 
3443  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3444 #if BLAZE_BLAS_MODE
3445 
3458  template< typename MT3 // Type of the left-hand side target matrix
3459  , typename MT4 // Type of the left-hand side matrix operand
3460  , typename MT5 // Type of the right-hand side matrix operand
3461  , typename ST2 > // Type of the scalar value
3462  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3463  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3464  {
3465  using boost::numeric_cast;
3466 
3467  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
3468  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
3469  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
3470 
3471  const int M ( numeric_cast<int>( A.rows() ) );
3472  const int N ( numeric_cast<int>( B.columns() ) );
3473  const int K ( numeric_cast<int>( A.columns() ) );
3474  const int lda( numeric_cast<int>( A.spacing() ) );
3475  const int ldb( numeric_cast<int>( B.spacing() ) );
3476  const int ldc( numeric_cast<int>( C.spacing() ) );
3477 
3478  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3479  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3480  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3481  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3482  }
3483 #endif
3484  //**********************************************************************************************
3485 
3486  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3487 #if BLAZE_BLAS_MODE
3488 
3501  template< typename MT3 // Type of the left-hand side target matrix
3502  , typename MT4 // Type of the left-hand side matrix operand
3503  , typename MT5 // Type of the right-hand side matrix operand
3504  , typename ST2 > // Type of the scalar value
3505  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3506  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3507  {
3508  using boost::numeric_cast;
3509 
3510  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3511  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3512  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3513 
3514  const int M ( numeric_cast<int>( A.rows() ) );
3515  const int N ( numeric_cast<int>( B.columns() ) );
3516  const int K ( numeric_cast<int>( A.columns() ) );
3517  const int lda( numeric_cast<int>( A.spacing() ) );
3518  const int ldb( numeric_cast<int>( B.spacing() ) );
3519  const int ldc( numeric_cast<int>( C.spacing() ) );
3520 
3521  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3522  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3523  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3524  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3525  }
3526 #endif
3527  //**********************************************************************************************
3528 
3529  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3530 #if BLAZE_BLAS_MODE
3531 
3544  template< typename MT3 // Type of the left-hand side target matrix
3545  , typename MT4 // Type of the left-hand side matrix operand
3546  , typename MT5 // Type of the right-hand side matrix operand
3547  , typename ST2 > // Type of the scalar value
3548  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3549  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3550  {
3551  using boost::numeric_cast;
3552 
3553  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3554  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3555  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3557  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3558  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3559  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3560 
3561  const int M ( numeric_cast<int>( A.rows() ) );
3562  const int N ( numeric_cast<int>( B.columns() ) );
3563  const int K ( numeric_cast<int>( A.columns() ) );
3564  const int lda( numeric_cast<int>( A.spacing() ) );
3565  const int ldb( numeric_cast<int>( B.spacing() ) );
3566  const int ldc( numeric_cast<int>( C.spacing() ) );
3567  const complex<float> alpha( -scalar );
3568  const complex<float> beta ( 1.0F, 0.0F );
3569 
3570  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3571  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3572  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3573  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3574  }
3575 #endif
3576  //**********************************************************************************************
3577 
3578  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3579 #if BLAZE_BLAS_MODE
3580 
3593  template< typename MT3 // Type of the left-hand side target matrix
3594  , typename MT4 // Type of the left-hand side matrix operand
3595  , typename MT5 // Type of the right-hand side matrix operand
3596  , typename ST2 > // Type of the scalar value
3597  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3598  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3599  {
3600  using boost::numeric_cast;
3601 
3602  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3603  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3604  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3606  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3607  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3608  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3609 
3610  const int M ( numeric_cast<int>( A.rows() ) );
3611  const int N ( numeric_cast<int>( B.columns() ) );
3612  const int K ( numeric_cast<int>( A.columns() ) );
3613  const int lda( numeric_cast<int>( A.spacing() ) );
3614  const int ldb( numeric_cast<int>( B.spacing() ) );
3615  const int ldc( numeric_cast<int>( C.spacing() ) );
3616  const complex<double> alpha( -scalar );
3617  const complex<double> beta ( 1.0, 0.0 );
3618 
3619  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3620  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3621  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3622  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3623  }
3624 #endif
3625  //**********************************************************************************************
3626 
3627  //**Subtraction assignment to sparse matrices***************************************************
3628  // No special implementation for the subtraction assignment to sparse matrices.
3629  //**********************************************************************************************
3630 
3631  //**Multiplication assignment to dense matrices*************************************************
3632  // No special implementation for the multiplication assignment to dense matrices.
3633  //**********************************************************************************************
3634 
3635  //**Multiplication assignment to sparse matrices************************************************
3636  // No special implementation for the multiplication assignment to sparse matrices.
3637  //**********************************************************************************************
3638 
3639  //**Compile time checks*************************************************************************
3647  //**********************************************************************************************
3648 };
3650 //*************************************************************************************************
3651 
3652 
3653 
3654 
3655 //=================================================================================================
3656 //
3657 // GLOBAL BINARY ARITHMETIC OPERATORS
3658 //
3659 //=================================================================================================
3660 
3661 //*************************************************************************************************
3687 template< typename T1 // Type of the left-hand side dense matrix
3688  , typename T2 > // Type of the right-hand side dense matrix
3689 inline const TDMatTDMatMultExpr<T1,T2>
3691 {
3693 
3694  if( (~lhs).columns() != (~rhs).rows() )
3695  throw std::invalid_argument( "Matrix sizes do not match" );
3696 
3697  return TDMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
3698 }
3699 //*************************************************************************************************
3700 
3701 
3702 
3703 
3704 //=================================================================================================
3705 //
3706 // GLOBAL OPERATORS
3707 //
3708 //=================================================================================================
3709 
3710 //*************************************************************************************************
3723 template< typename MT1 // Type of the left-hand side dense matrix
3724  , typename MT2 > // Type of the right-hand side dense matrix
3725 inline typename RowExprTrait< TDMatTDMatMultExpr<MT1,MT2> >::Type
3726  row( const TDMatTDMatMultExpr<MT1,MT2>& dm, size_t index )
3727 {
3729 
3730  return row( dm.leftOperand(), index ) * dm.rightOperand();
3731 }
3733 //*************************************************************************************************
3734 
3735 
3736 //*************************************************************************************************
3749 template< typename MT1 // Type of the left-hand side dense matrix
3750  , typename MT2 > // Type of the right-hand side dense matrix
3751 inline typename ColumnExprTrait< TDMatTDMatMultExpr<MT1,MT2> >::Type
3752  column( const TDMatTDMatMultExpr<MT1,MT2>& dm, size_t index )
3753 {
3755 
3756  return dm.leftOperand() * column( dm.rightOperand(), index );
3757 }
3759 //*************************************************************************************************
3760 
3761 
3762 
3763 
3764 //=================================================================================================
3765 //
3766 // EXPRESSION TRAIT SPECIALIZATIONS
3767 //
3768 //=================================================================================================
3769 
3770 //*************************************************************************************************
3772 template< typename MT1, typename MT2, typename VT >
3773 struct TDMatDVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
3774 {
3775  public:
3776  //**********************************************************************************************
3777  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3778  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3779  IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
3780  , typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
3781  , INVALID_TYPE >::Type Type;
3782  //**********************************************************************************************
3783 };
3785 //*************************************************************************************************
3786 
3787 
3788 //*************************************************************************************************
3790 template< typename MT1, typename MT2, typename VT >
3791 struct TDMatSVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
3792 {
3793  public:
3794  //**********************************************************************************************
3795  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3796  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3797  IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
3798  , typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
3799  , INVALID_TYPE >::Type Type;
3800  //**********************************************************************************************
3801 };
3803 //*************************************************************************************************
3804 
3805 
3806 //*************************************************************************************************
3808 template< typename VT, typename MT1, typename MT2 >
3809 struct TDVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
3810 {
3811  public:
3812  //**********************************************************************************************
3813  typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
3814  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3815  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3816  , typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3817  , INVALID_TYPE >::Type Type;
3818  //**********************************************************************************************
3819 };
3821 //*************************************************************************************************
3822 
3823 
3824 //*************************************************************************************************
3826 template< typename VT, typename MT1, typename MT2 >
3827 struct TSVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
3828 {
3829  public:
3830  //**********************************************************************************************
3831  typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
3832  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3833  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3834  , typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3835  , INVALID_TYPE >::Type Type;
3836  //**********************************************************************************************
3837 };
3839 //*************************************************************************************************
3840 
3841 
3842 //*************************************************************************************************
3844 template< typename MT1, typename MT2 >
3845 struct RowExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
3846 {
3847  public:
3848  //**********************************************************************************************
3849  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
3850  //**********************************************************************************************
3851 };
3853 //*************************************************************************************************
3854 
3855 
3856 //*************************************************************************************************
3858 template< typename MT1, typename MT2 >
3859 struct ColumnExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
3860 {
3861  public:
3862  //**********************************************************************************************
3863  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
3864  //**********************************************************************************************
3865 };
3867 //*************************************************************************************************
3868 
3869 } // namespace blaze
3870 
3871 #endif