All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
20 //=================================================================================================
21 
22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
24 
25 
26 //*************************************************************************************************
27 // Includes
28 //*************************************************************************************************
29 
30 #include <stdexcept>
31 #include <boost/cast.hpp>
41 #include <blaze/math/Intrinsics.h>
42 #include <blaze/math/shims/Reset.h>
59 #include <blaze/system/BLAS.h>
61 #include <blaze/util/Assert.h>
62 #include <blaze/util/Complex.h>
69 #include <blaze/util/DisableIf.h>
70 #include <blaze/util/EnableIf.h>
71 #include <blaze/util/InvalidType.h>
73 #include <blaze/util/SelectType.h>
74 #include <blaze/util/Types.h>
80 
81 
82 namespace blaze {
83 
84 //=================================================================================================
85 //
86 // CLASS DMATDMATMULTEXPR
87 //
88 //=================================================================================================
89 
90 //*************************************************************************************************
97 template< typename MT1 // Type of the left-hand side dense matrix
98  , typename MT2 > // Type of the right-hand side dense matrix
99 class DMatDMatMultExpr : public DenseMatrix< DMatDMatMultExpr<MT1,MT2>, false >
100  , private MatMatMultExpr
101  , private Computation
102 {
103  private:
104  //**Type definitions****************************************************************************
105  typedef typename MT1::ResultType RT1;
106  typedef typename MT2::ResultType RT2;
107  typedef typename MT1::CompositeType CT1;
108  typedef typename MT2::CompositeType CT2;
109  //**********************************************************************************************
110 
111  //**********************************************************************************************
113 
114 
116  template< typename T1, typename T2, typename T3 >
117  struct UseSinglePrecisionKernel {
121  };
123  //**********************************************************************************************
124 
125  //**********************************************************************************************
127 
128 
130  template< typename T1, typename T2, typename T3 >
131  struct UseDoublePrecisionKernel {
135  };
137  //**********************************************************************************************
138 
139  //**********************************************************************************************
141 
142 
145  template< typename T1, typename T2, typename T3 >
146  struct UseSinglePrecisionComplexKernel {
147  typedef complex<float> Type;
148  enum { value = IsSame<typename T1::ElementType,Type>::value &&
149  IsSame<typename T2::ElementType,Type>::value &&
150  IsSame<typename T3::ElementType,Type>::value };
151  };
153  //**********************************************************************************************
154 
155  //**********************************************************************************************
157 
158 
161  template< typename T1, typename T2, typename T3 >
162  struct UseDoublePrecisionComplexKernel {
163  typedef complex<double> Type;
164  enum { value = IsSame<typename T1::ElementType,Type>::value &&
165  IsSame<typename T2::ElementType,Type>::value &&
166  IsSame<typename T3::ElementType,Type>::value };
167  };
169  //**********************************************************************************************
170 
171  //**********************************************************************************************
173 
174 
176  template< typename T1, typename T2, typename T3 >
177  struct UseDefaultKernel {
178  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
179  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
180  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
181  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
182  };
184  //**********************************************************************************************
185 
186  //**********************************************************************************************
188 
189 
191  template< typename T1, typename T2, typename T3 >
192  struct UseVectorizedDefaultKernel {
193  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
194  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
195  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
196  IntrinsicTrait<typename T1::ElementType>::addition &&
197  IntrinsicTrait<typename T1::ElementType>::multiplication };
198  };
200  //**********************************************************************************************
201 
202  public:
203  //**Type definitions****************************************************************************
206  typedef typename ResultType::OppositeType OppositeType;
207  typedef typename ResultType::TransposeType TransposeType;
208  typedef typename ResultType::ElementType ElementType;
210  typedef const ElementType ReturnType;
211  typedef const ResultType CompositeType;
212 
214  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
215 
217  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
218 
220  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
221 
223  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
224  //**********************************************************************************************
225 
226  //**Compilation flags***************************************************************************
228  enum { vectorizable = 0 };
229  //**********************************************************************************************
230 
231  //**Constructor*********************************************************************************
237  explicit inline DMatDMatMultExpr( const MT1& lhs, const MT2& rhs )
238  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
239  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
240  {
241  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
242  }
243  //**********************************************************************************************
244 
245  //**Access operator*****************************************************************************
252  inline ReturnType operator()( size_t i, size_t j ) const {
253  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
254  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
255 
256  ElementType tmp;
257 
258  if( lhs_.columns() != 0UL ) {
259  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
260  tmp = lhs_(i,0UL) * rhs_(0UL,j);
261  for( size_t k=1UL; k<end; k+=2UL ) {
262  tmp += lhs_(i,k ) * rhs_(k ,j);
263  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
264  }
265  if( end < lhs_.columns() ) {
266  tmp += lhs_(i,end) * rhs_(end,j);
267  }
268  }
269  else {
270  reset( tmp );
271  }
272 
273  return tmp;
274  }
275  //**********************************************************************************************
276 
277  //**Rows function*******************************************************************************
282  inline size_t rows() const {
283  return lhs_.rows();
284  }
285  //**********************************************************************************************
286 
287  //**Columns function****************************************************************************
292  inline size_t columns() const {
293  return rhs_.columns();
294  }
295  //**********************************************************************************************
296 
297  //**Left operand access*************************************************************************
302  inline LeftOperand leftOperand() const {
303  return lhs_;
304  }
305  //**********************************************************************************************
306 
307  //**Right operand access************************************************************************
312  inline RightOperand rightOperand() const {
313  return rhs_;
314  }
315  //**********************************************************************************************
316 
317  //**********************************************************************************************
323  template< typename T >
324  inline bool canAlias( const T* alias ) const {
325  return ( lhs_.canAlias( alias ) || rhs_.canAlias( alias ) );
326  }
327  //**********************************************************************************************
328 
329  //**********************************************************************************************
335  template< typename T >
336  inline bool isAliased( const T* alias ) const {
337  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
338  }
339  //**********************************************************************************************
340 
341  private:
342  //**Member variables****************************************************************************
345  //**********************************************************************************************
346 
347  //**Assignment to dense matrices****************************************************************
357  template< typename MT3 // Type of the target dense matrix
358  , bool SO > // Storage order of the target dense matrix
359  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
360  {
362 
363  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
364  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
365 
366  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
367  return;
368  }
369  else if( rhs.lhs_.columns() == 0UL ) {
370  reset( ~lhs );
371  return;
372  }
373 
374  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
375  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
376 
377  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
378  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
379  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
380  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
381  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
382  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
383 
384  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
385  DMatDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
386  else
387  DMatDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
388  }
390  //**********************************************************************************************
391 
392  //**Default assignment to dense matrices********************************************************
405  template< typename MT3 // Type of the left-hand side target matrix
406  , typename MT4 // Type of the left-hand side matrix operand
407  , typename MT5 > // Type of the right-hand side matrix operand
408  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
409  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
410  {
411  const size_t M( A.rows() );
412  const size_t N( B.columns() );
413  const size_t K( A.columns() );
414 
415  for( size_t i=0UL; i<M; ++i ) {
416  for( size_t j=0UL; j<N; ++j ) {
417  C(i,j) = A(i,0UL) * B(0UL,j);
418  }
419  for( size_t k=1UL; k<K; ++k ) {
420  for( size_t j=0UL; j<N; ++j ) {
421  C(i,j) += A(i,k) * B(k,j);
422  }
423  }
424  }
425  }
427  //**********************************************************************************************
428 
429  //**Vectorized default assignment to row-major dense matrices***********************************
443  template< typename MT3 // Type of the left-hand side target matrix
444  , typename MT4 // Type of the left-hand side matrix operand
445  , typename MT5 > // Type of the right-hand side matrix operand
446  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
447  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
448  {
449  typedef IntrinsicTrait<ElementType> IT;
450 
451  const size_t M( A.rows() );
452  const size_t N( B.spacing() );
453  const size_t K( A.columns() );
454 
455  size_t j( 0UL );
456 
457  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
458  for( size_t i=0UL; i<M; ++i ) {
459  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
460  for( size_t k=0UL; k<K; ++k ) {
461  const IntrinsicType a1( set( A(i,k) ) );
462  xmm1 = xmm1 + a1 * B.get(k,j );
463  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
464  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
465  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
466  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
467  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
468  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
469  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
470  }
471  store( &(~C)(i,j ), xmm1 );
472  store( &(~C)(i,j+IT::size ), xmm2 );
473  store( &(~C)(i,j+IT::size*2UL), xmm3 );
474  store( &(~C)(i,j+IT::size*3UL), xmm4 );
475  store( &(~C)(i,j+IT::size*4UL), xmm5 );
476  store( &(~C)(i,j+IT::size*5UL), xmm6 );
477  store( &(~C)(i,j+IT::size*6UL), xmm7 );
478  store( &(~C)(i,j+IT::size*7UL), xmm8 );
479  }
480  }
481  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
482  size_t i( 0UL );
483  for( ; (i+2UL) <= M; i+=2UL ) {
484  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
485  for( size_t k=0UL; k<K; ++k ) {
486  const IntrinsicType a1( set( A(i ,k) ) );
487  const IntrinsicType a2( set( A(i+1UL,k) ) );
488  const IntrinsicType b1( B.get(k,j ) );
489  const IntrinsicType b2( B.get(k,j+IT::size ) );
490  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
491  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
492  xmm1 = xmm1 + a1 * b1;
493  xmm2 = xmm2 + a1 * b2;
494  xmm3 = xmm3 + a1 * b3;
495  xmm4 = xmm4 + a1 * b4;
496  xmm5 = xmm5 + a2 * b1;
497  xmm6 = xmm6 + a2 * b2;
498  xmm7 = xmm7 + a2 * b3;
499  xmm8 = xmm8 + a2 * b4;
500  }
501  store( &(~C)(i ,j ), xmm1 );
502  store( &(~C)(i ,j+IT::size ), xmm2 );
503  store( &(~C)(i ,j+IT::size*2UL), xmm3 );
504  store( &(~C)(i ,j+IT::size*3UL), xmm4 );
505  store( &(~C)(i+1UL,j ), xmm5 );
506  store( &(~C)(i+1UL,j+IT::size ), xmm6 );
507  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 );
508  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 );
509  }
510  if( i < M ) {
511  IntrinsicType xmm1, xmm2, xmm3, xmm4;
512  for( size_t k=0UL; k<K; ++k ) {
513  const IntrinsicType a1( set( A(i,k) ) );
514  xmm1 = xmm1 + a1 * B.get(k,j );
515  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
516  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
517  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
518  }
519  store( &(~C)(i,j ), xmm1 );
520  store( &(~C)(i,j+IT::size ), xmm2 );
521  store( &(~C)(i,j+IT::size*2UL), xmm3 );
522  store( &(~C)(i,j+IT::size*3UL), xmm4 );
523  }
524  }
525  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
526  size_t i( 0UL );
527  for( ; (i+2UL) <= M; i+=2UL ) {
528  IntrinsicType xmm1, xmm2, xmm3, xmm4;
529  for( size_t k=0UL; k<K; ++k ) {
530  const IntrinsicType a1( set( A(i ,k) ) );
531  const IntrinsicType a2( set( A(i+1UL,k) ) );
532  const IntrinsicType b1( B.get(k,j ) );
533  const IntrinsicType b2( B.get(k,j+IT::size) );
534  xmm1 = xmm1 + a1 * b1;
535  xmm2 = xmm2 + a1 * b2;
536  xmm3 = xmm3 + a2 * b1;
537  xmm4 = xmm4 + a2 * b2;
538  }
539  store( &(~C)(i ,j ), xmm1 );
540  store( &(~C)(i ,j+IT::size), xmm2 );
541  store( &(~C)(i+1UL,j ), xmm3 );
542  store( &(~C)(i+1UL,j+IT::size), xmm4 );
543  }
544  if( i < M ) {
545  IntrinsicType xmm1, xmm2;
546  for( size_t k=0UL; k<K; ++k ) {
547  const IntrinsicType a1( set( A(i,k) ) );
548  xmm1 = xmm1 + a1 * B.get(k,j );
549  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
550  }
551  store( &(~C)(i,j ), xmm1 );
552  store( &(~C)(i,j+IT::size), xmm2 );
553  }
554  }
555  if( j < N ) {
556  size_t i( 0UL );
557  for( ; (i+2UL) <= M; i+=2UL ) {
558  IntrinsicType xmm1, xmm2;
559  for( size_t k=0UL; k<K; ++k ) {
560  const IntrinsicType b1( B.get(k,j) );
561  xmm1 = xmm1 + set( A(i ,k) ) * b1;
562  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
563  }
564  store( &(~C)(i ,j), xmm1 );
565  store( &(~C)(i+1UL,j), xmm2 );
566  }
567  if( i < M ) {
568  IntrinsicType xmm1;
569  for( size_t k=0UL; k<K; ++k ) {
570  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
571  }
572  store( &(~C)(i,j), xmm1 );
573  }
574  }
575  }
577  //**********************************************************************************************
578 
579  //**Vectorized default assignment to column-major dense matrices********************************
593  template< typename MT3 // Type of the left-hand side target matrix
594  , typename MT4 // Type of the left-hand side matrix operand
595  , typename MT5 > // Type of the right-hand side matrix operand
596  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
597  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
598  {
599  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
600  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
601 
602  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
603  const typename MT4::OppositeType tmp( A );
604  assign( ~C, tmp * B );
605  }
606  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
607  const typename MT5::OppositeType tmp( B );
608  assign( ~C, A * tmp );
609  }
610  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
611  const typename MT4::OppositeType tmp( A );
612  assign( ~C, tmp * B );
613  }
614  else {
615  const typename MT5::OppositeType tmp( B );
616  assign( ~C, A * tmp );
617  }
618  }
620  //**********************************************************************************************
621 
622  //**BLAS-based assignment to dense matrices (default)*******************************************
635  template< typename MT3 // Type of the left-hand side target matrix
636  , typename MT4 // Type of the left-hand side matrix operand
637  , typename MT5 > // Type of the right-hand side matrix operand
638  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
639  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
640  {
641  selectDefaultAssignKernel( C, A, B );
642  }
644  //**********************************************************************************************
645 
646  //**BLAS-based assignment to dense matrices (single precision)**********************************
647 #if BLAZE_BLAS_MODE
648 
661  template< typename MT3 // Type of the left-hand side target matrix
662  , typename MT4 // Type of the left-hand side matrix operand
663  , typename MT5 > // Type of the right-hand side matrix operand
664  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
665  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
666  {
667  using boost::numeric_cast;
668 
669  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
670  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
671  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
672 
673  const int M ( numeric_cast<int>( A.rows() ) );
674  const int N ( numeric_cast<int>( B.columns() ) );
675  const int K ( numeric_cast<int>( A.columns() ) );
676  const int lda( numeric_cast<int>( A.spacing() ) );
677  const int ldb( numeric_cast<int>( B.spacing() ) );
678  const int ldc( numeric_cast<int>( C.spacing() ) );
679 
680  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
681  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
682  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
683  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
684  }
686 #endif
687  //**********************************************************************************************
688 
689  //**BLAS-based assignment to dense matrices (double precision)**********************************
690 #if BLAZE_BLAS_MODE
691 
704  template< typename MT3 // Type of the left-hand side target matrix
705  , typename MT4 // Type of the left-hand side matrix operand
706  , typename MT5 > // Type of the right-hand side matrix operand
707  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
708  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
709  {
710  using boost::numeric_cast;
711 
712  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
713  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
714  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
715 
716  const int M ( numeric_cast<int>( A.rows() ) );
717  const int N ( numeric_cast<int>( B.columns() ) );
718  const int K ( numeric_cast<int>( A.columns() ) );
719  const int lda( numeric_cast<int>( A.spacing() ) );
720  const int ldb( numeric_cast<int>( B.spacing() ) );
721  const int ldc( numeric_cast<int>( C.spacing() ) );
722 
723  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
724  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
725  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
726  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
727  }
729 #endif
730  //**********************************************************************************************
731 
732  //**BLAS-based assignment to dense matrices (single precision complex)**************************
733 #if BLAZE_BLAS_MODE
734 
747  template< typename MT3 // Type of the left-hand side target matrix
748  , typename MT4 // Type of the left-hand side matrix operand
749  , typename MT5 > // Type of the right-hand side matrix operand
750  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
751  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
752  {
753  using boost::numeric_cast;
754 
755  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
756  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
757  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
758  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
759  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
760  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
761 
762  const int M ( numeric_cast<int>( A.rows() ) );
763  const int N ( numeric_cast<int>( B.columns() ) );
764  const int K ( numeric_cast<int>( A.columns() ) );
765  const int lda( numeric_cast<int>( A.spacing() ) );
766  const int ldb( numeric_cast<int>( B.spacing() ) );
767  const int ldc( numeric_cast<int>( C.spacing() ) );
768  const complex<float> alpha( 1.0F, 0.0F );
769  const complex<float> beta ( 0.0F, 0.0F );
770 
771  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
772  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
773  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
774  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
775  }
777 #endif
778  //**********************************************************************************************
779 
780  //**BLAS-based assignment to dense matrices (double precision complex)**************************
781 #if BLAZE_BLAS_MODE
782 
795  template< typename MT3 // Type of the left-hand side target matrix
796  , typename MT4 // Type of the left-hand side matrix operand
797  , typename MT5 > // Type of the right-hand side matrix operand
798  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
799  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
800  {
801  using boost::numeric_cast;
802 
803  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
804  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
805  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
806  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
807  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
808  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
809 
810  const int M ( numeric_cast<int>( A.rows() ) );
811  const int N ( numeric_cast<int>( B.columns() ) );
812  const int K ( numeric_cast<int>( A.columns() ) );
813  const int lda( numeric_cast<int>( A.spacing() ) );
814  const int ldb( numeric_cast<int>( B.spacing() ) );
815  const int ldc( numeric_cast<int>( C.spacing() ) );
816  const complex<double> alpha( 1.0, 0.0 );
817  const complex<double> beta ( 0.0, 0.0 );
818 
819  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
820  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
821  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
822  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
823  }
825 #endif
826  //**********************************************************************************************
827 
828  //**Assignment to sparse matrices***************************************************************
840  template< typename MT // Type of the target sparse matrix
841  , bool SO > // Storage order of the target sparse matrix
842  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
843  {
845 
846  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
847 
853  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
854 
855  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
856  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
857 
858  const TmpType tmp( rhs );
859  assign( ~lhs, tmp );
860  }
862  //**********************************************************************************************
863 
864  //**Addition assignment to dense matrices*******************************************************
877  template< typename MT3 // Type of the target dense matrix
878  , bool SO > // Storage order of the target dense matrix
879  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
880  {
882 
883  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
884  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
885 
886  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
887  return;
888  }
889 
890  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
891  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
892 
893  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
894  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
895  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
896  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
897  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
898  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
899 
900  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
901  DMatDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
902  else
903  DMatDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
904  }
906  //**********************************************************************************************
907 
908  //**Default addition assignment to dense matrices***********************************************
922  template< typename MT3 // Type of the left-hand side target matrix
923  , typename MT4 // Type of the left-hand side matrix operand
924  , typename MT5 > // Type of the right-hand side matrix operand
925  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
926  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
927  {
928  const size_t M( A.rows() );
929  const size_t N( B.columns() );
930  const size_t K( A.columns() );
931 
932  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
933  const size_t end( N & size_t(-2) );
934 
935  for( size_t i=0UL; i<M; ++i ) {
936  for( size_t k=0UL; k<K; ++k ) {
937  for( size_t j=0UL; j<end; j+=2UL ) {
938  C(i,j ) += A(i,k) * B(k,j );
939  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
940  }
941  if( end < N ) {
942  C(i,end) += A(i,k) * B(k,end);
943  }
944  }
945  }
946  }
948  //**********************************************************************************************
949 
950  //**Vectorized default addition assignment to row-major dense matrices**************************
964  template< typename MT3 // Type of the left-hand side target matrix
965  , typename MT4 // Type of the left-hand side matrix operand
966  , typename MT5 > // Type of the right-hand side matrix operand
967  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
968  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
969  {
970  typedef IntrinsicTrait<ElementType> IT;
971 
972  const size_t M( A.rows() );
973  const size_t N( B.spacing() );
974  const size_t K( A.columns() );
975 
976  size_t j( 0UL );
977 
978  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
979  for( size_t i=0UL; i<M; ++i ) {
980  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
981  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
982  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
983  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
984  IntrinsicType xmm5( load( &(~C)(i,j+IT::size*4UL) ) );
985  IntrinsicType xmm6( load( &(~C)(i,j+IT::size*5UL) ) );
986  IntrinsicType xmm7( load( &(~C)(i,j+IT::size*6UL) ) );
987  IntrinsicType xmm8( load( &(~C)(i,j+IT::size*7UL) ) );
988  for( size_t k=0UL; k<K; ++k ) {
989  const IntrinsicType a1( set( A(i,k) ) );
990  xmm1 = xmm1 + a1 * B.get(k,j );
991  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
992  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
993  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
994  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
995  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
996  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
997  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
998  }
999  store( &(~C)(i,j ), xmm1 );
1000  store( &(~C)(i,j+IT::size ), xmm2 );
1001  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1002  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1003  store( &(~C)(i,j+IT::size*4UL), xmm5 );
1004  store( &(~C)(i,j+IT::size*5UL), xmm6 );
1005  store( &(~C)(i,j+IT::size*6UL), xmm7 );
1006  store( &(~C)(i,j+IT::size*7UL), xmm8 );
1007  }
1008  }
1009  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
1010  size_t i( 0UL );
1011  for( ; (i+2UL) <= M; i+=2UL ) {
1012  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1013  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size ) ) );
1014  IntrinsicType xmm3( load( &(~C)(i ,j+IT::size*2UL) ) );
1015  IntrinsicType xmm4( load( &(~C)(i ,j+IT::size*3UL) ) );
1016  IntrinsicType xmm5( load( &(~C)(i+1UL,j ) ) );
1017  IntrinsicType xmm6( load( &(~C)(i+1UL,j+IT::size ) ) );
1018  IntrinsicType xmm7( load( &(~C)(i+1UL,j+IT::size*2UL) ) );
1019  IntrinsicType xmm8( load( &(~C)(i+1UL,j+IT::size*3UL) ) );
1020  for( size_t k=0UL; k<K; ++k ) {
1021  const IntrinsicType a1( set( A(i ,k) ) );
1022  const IntrinsicType a2( set( A(i+1UL,k) ) );
1023  const IntrinsicType b1( B.get(k,j ) );
1024  const IntrinsicType b2( B.get(k,j+IT::size ) );
1025  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
1026  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
1027  xmm1 = xmm1 + a1 * b1;
1028  xmm2 = xmm2 + a1 * b2;
1029  xmm3 = xmm3 + a1 * b3;
1030  xmm4 = xmm4 + a1 * b4;
1031  xmm5 = xmm5 + a2 * b1;
1032  xmm6 = xmm6 + a2 * b2;
1033  xmm7 = xmm7 + a2 * b3;
1034  xmm8 = xmm8 + a2 * b4;
1035  }
1036  store( &(~C)(i ,j ), xmm1 );
1037  store( &(~C)(i ,j+IT::size ), xmm2 );
1038  store( &(~C)(i ,j+IT::size*2UL), xmm3 );
1039  store( &(~C)(i ,j+IT::size*3UL), xmm4 );
1040  store( &(~C)(i+1UL,j ), xmm5 );
1041  store( &(~C)(i+1UL,j+IT::size ), xmm6 );
1042  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 );
1043  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 );
1044  }
1045  if( i < M ) {
1046  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1047  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
1048  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
1049  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
1050  for( size_t k=0UL; k<K; ++k ) {
1051  const IntrinsicType a1( set( A(i,k) ) );
1052  xmm1 = xmm1 + a1 * B.get(k,j );
1053  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
1054  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
1055  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
1056  }
1057  store( &(~C)(i,j ), xmm1 );
1058  store( &(~C)(i,j+IT::size ), xmm2 );
1059  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1060  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1061  }
1062  }
1063  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1064  size_t i( 0UL );
1065  for( ; (i+2UL) <= M; i+=2UL ) {
1066  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1067  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size) ) );
1068  IntrinsicType xmm3( load( &(~C)(i+1UL,j ) ) );
1069  IntrinsicType xmm4( load( &(~C)(i+1UL,j+IT::size) ) );
1070  for( size_t k=0UL; k<K; ++k ) {
1071  const IntrinsicType a1( set( A(i ,k) ) );
1072  const IntrinsicType a2( set( A(i+1UL,k) ) );
1073  const IntrinsicType b1( B.get(k,j ) );
1074  const IntrinsicType b2( B.get(k,j+IT::size) );
1075  xmm1 = xmm1 + a1 * b1;
1076  xmm2 = xmm2 + a1 * b2;
1077  xmm3 = xmm3 + a2 * b1;
1078  xmm4 = xmm4 + a2 * b2;
1079  }
1080  store( &(~C)(i ,j ), xmm1 );
1081  store( &(~C)(i ,j+IT::size), xmm2 );
1082  store( &(~C)(i+1UL,j ), xmm3 );
1083  store( &(~C)(i+1UL,j+IT::size), xmm4 );
1084  }
1085  if( i < M ) {
1086  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1087  IntrinsicType xmm2( load( &(~C)(i,j+IT::size) ) );
1088  for( size_t k=0UL; k<K; ++k ) {
1089  const IntrinsicType a1( set( A(i,k) ) );
1090  xmm1 = xmm1 + a1 * B.get(k,j );
1091  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
1092  }
1093  store( &(~C)(i,j ), xmm1 );
1094  store( &(~C)(i,j+IT::size), xmm2 );
1095  }
1096  }
1097  if( j < N ) {
1098  size_t i( 0UL );
1099  for( ; (i+2UL) <= M; i+=2UL ) {
1100  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1101  IntrinsicType xmm2( load( &(~C)(i+1UL,j) ) );
1102  for( size_t k=0UL; k<K; ++k ) {
1103  const IntrinsicType b1( B.get(k,j) );
1104  xmm1 = xmm1 + set( A(i ,k) ) * b1;
1105  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
1106  }
1107  store( &(~C)(i ,j), xmm1 );
1108  store( &(~C)(i+1UL,j), xmm2 );
1109  }
1110  if( i < M ) {
1111  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1112  for( size_t k=0UL; k<K; ++k ) {
1113  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
1114  }
1115  store( &(~C)(i,j), xmm1 );
1116  }
1117  }
1118  }
1120  //**********************************************************************************************
1121 
1122  //**Vectorized default addition assignment to column-major dense matrices***********************
1136  template< typename MT3 // Type of the left-hand side target matrix
1137  , typename MT4 // Type of the left-hand side matrix operand
1138  , typename MT5 > // Type of the right-hand side matrix operand
1139  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1140  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1141  {
1142  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
1143  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
1144 
1145  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1146  const typename MT4::OppositeType tmp( A );
1147  addAssign( ~C, tmp * B );
1148  }
1149  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1150  const typename MT5::OppositeType tmp( B );
1151  addAssign( ~C, A * tmp );
1152  }
1153  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1154  const typename MT4::OppositeType tmp( A );
1155  addAssign( ~C, tmp * B );
1156  }
1157  else {
1158  const typename MT5::OppositeType tmp( B );
1159  addAssign( ~C, A * tmp );
1160  }
1161  }
1163  //**********************************************************************************************
1164 
1165  //**BLAS-based addition assignment to dense matrices (default)**********************************
1179  template< typename MT3 // Type of the left-hand side target matrix
1180  , typename MT4 // Type of the left-hand side matrix operand
1181  , typename MT5 > // Type of the right-hand side matrix operand
1182  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1183  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1184  {
1185  selectDefaultAddAssignKernel( C, A, B );
1186  }
1188  //**********************************************************************************************
1189 
1190  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1191 #if BLAZE_BLAS_MODE
1192 
1205  template< typename MT3 // Type of the left-hand side target matrix
1206  , typename MT4 // Type of the left-hand side matrix operand
1207  , typename MT5 > // Type of the right-hand side matrix operand
1208  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1209  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1210  {
1211  using boost::numeric_cast;
1212 
1213  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1214  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1215  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1216 
1217  const int M ( numeric_cast<int>( A.rows() ) );
1218  const int N ( numeric_cast<int>( B.columns() ) );
1219  const int K ( numeric_cast<int>( A.columns() ) );
1220  const int lda( numeric_cast<int>( A.spacing() ) );
1221  const int ldb( numeric_cast<int>( B.spacing() ) );
1222  const int ldc( numeric_cast<int>( C.spacing() ) );
1223 
1224  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1225  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1226  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1227  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1228  }
1230 #endif
1231  //**********************************************************************************************
1232 
1233  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1234 #if BLAZE_BLAS_MODE
1235 
1248  template< typename MT3 // Type of the left-hand side target matrix
1249  , typename MT4 // Type of the left-hand side matrix operand
1250  , typename MT5 > // Type of the right-hand side matrix operand
1251  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1252  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1253  {
1254  using boost::numeric_cast;
1255 
1256  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1257  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1258  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1259 
1260  const int M ( numeric_cast<int>( A.rows() ) );
1261  const int N ( numeric_cast<int>( B.columns() ) );
1262  const int K ( numeric_cast<int>( A.columns() ) );
1263  const int lda( numeric_cast<int>( A.spacing() ) );
1264  const int ldb( numeric_cast<int>( B.spacing() ) );
1265  const int ldc( numeric_cast<int>( C.spacing() ) );
1266 
1267  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1268  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1269  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1270  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1271  }
1273 #endif
1274  //**********************************************************************************************
1275 
1276  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1277 #if BLAZE_BLAS_MODE
1278 
1291  template< typename MT3 // Type of the left-hand side target matrix
1292  , typename MT4 // Type of the left-hand side matrix operand
1293  , typename MT5 > // Type of the right-hand side matrix operand
1294  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1295  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1296  {
1297  using boost::numeric_cast;
1298 
1299  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1300  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1301  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1302  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1303  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1304  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1305 
1306  const int M ( numeric_cast<int>( A.rows() ) );
1307  const int N ( numeric_cast<int>( B.columns() ) );
1308  const int K ( numeric_cast<int>( A.columns() ) );
1309  const int lda( numeric_cast<int>( A.spacing() ) );
1310  const int ldb( numeric_cast<int>( B.spacing() ) );
1311  const int ldc( numeric_cast<int>( C.spacing() ) );
1312  const complex<float> alpha( 1.0F, 0.0F );
1313  const complex<float> beta ( 1.0F, 0.0F );
1314 
1315  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1316  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1317  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1318  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1319  }
1321 #endif
1322  //**********************************************************************************************
1323 
1324  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1325 #if BLAZE_BLAS_MODE
1326 
1339  template< typename MT3 // Type of the left-hand side target matrix
1340  , typename MT4 // Type of the left-hand side matrix operand
1341  , typename MT5 > // Type of the right-hand side matrix operand
1342  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1343  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1344  {
1345  using boost::numeric_cast;
1346 
1347  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1348  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1349  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1350  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1351  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1352  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1353 
1354  const int M ( numeric_cast<int>( A.rows() ) );
1355  const int N ( numeric_cast<int>( B.columns() ) );
1356  const int K ( numeric_cast<int>( A.columns() ) );
1357  const int lda( numeric_cast<int>( A.spacing() ) );
1358  const int ldb( numeric_cast<int>( B.spacing() ) );
1359  const int ldc( numeric_cast<int>( C.spacing() ) );
1360  const complex<double> alpha( 1.0, 0.0 );
1361  const complex<double> beta ( 1.0, 0.0 );
1362 
1363  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1364  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1365  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1366  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1367  }
1369 #endif
1370  //**********************************************************************************************
1371 
1372  //**Addition assignment to sparse matrices******************************************************
1373  // No special implementation for the addition assignment to sparse matrices.
1374  //**********************************************************************************************
1375 
1376  //**Subtraction assignment to dense matrices****************************************************
1389  template< typename MT3 // Type of the target dense matrix
1390  , bool SO > // Storage order of the target dense matrix
1391  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
1392  {
1394 
1395  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1396  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1397 
1398  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1399  return;
1400  }
1401 
1402  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1403  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1404 
1405  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1406  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1407  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1408  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1409  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1410  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1411 
1412  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
1413  DMatDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1414  else
1415  DMatDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1416  }
1418  //**********************************************************************************************
1419 
1420  //**Default subtraction assignment to dense matrices********************************************
1434  template< typename MT3 // Type of the left-hand side target matrix
1435  , typename MT4 // Type of the left-hand side matrix operand
1436  , typename MT5 > // Type of the right-hand side matrix operand
1437  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1438  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1439  {
1440  const size_t M( A.rows() );
1441  const size_t N( B.columns() );
1442  const size_t K( A.columns() );
1443 
1444  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1445  const size_t end( N & size_t(-2) );
1446 
1447  for( size_t i=0UL; i<M; ++i ) {
1448  for( size_t k=0UL; k<K; ++k ) {
1449  for( size_t j=0UL; j<end; j+=2UL ) {
1450  C(i,j ) -= A(i,k) * B(k,j );
1451  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1452  }
1453  if( end < N ) {
1454  C(i,end) -= A(i,k) * B(k,end);
1455  }
1456  }
1457  }
1458  }
1460  //**********************************************************************************************
1461 
1462  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1476  template< typename MT3 // Type of the left-hand side target matrix
1477  , typename MT4 // Type of the left-hand side matrix operand
1478  , typename MT5 > // Type of the right-hand side matrix operand
1479  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1480  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1481  {
1482  typedef IntrinsicTrait<ElementType> IT;
1483 
1484  const size_t M( A.rows() );
1485  const size_t N( B.spacing() );
1486  const size_t K( A.columns() );
1487 
1488  size_t j( 0UL );
1489 
1490  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
1491  for( size_t i=0UL; i<M; ++i ) {
1492  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1493  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
1494  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
1495  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
1496  IntrinsicType xmm5( load( &(~C)(i,j+IT::size*4UL) ) );
1497  IntrinsicType xmm6( load( &(~C)(i,j+IT::size*5UL) ) );
1498  IntrinsicType xmm7( load( &(~C)(i,j+IT::size*6UL) ) );
1499  IntrinsicType xmm8( load( &(~C)(i,j+IT::size*7UL) ) );
1500  for( size_t k=0UL; k<K; ++k ) {
1501  const IntrinsicType a1( set( A(i,k) ) );
1502  xmm1 = xmm1 - a1 * B.get(k,j );
1503  xmm2 = xmm2 - a1 * B.get(k,j+IT::size );
1504  xmm3 = xmm3 - a1 * B.get(k,j+IT::size*2UL);
1505  xmm4 = xmm4 - a1 * B.get(k,j+IT::size*3UL);
1506  xmm5 = xmm5 - a1 * B.get(k,j+IT::size*4UL);
1507  xmm6 = xmm6 - a1 * B.get(k,j+IT::size*5UL);
1508  xmm7 = xmm7 - a1 * B.get(k,j+IT::size*6UL);
1509  xmm8 = xmm8 - a1 * B.get(k,j+IT::size*7UL);
1510  }
1511  store( &(~C)(i,j ), xmm1 );
1512  store( &(~C)(i,j+IT::size ), xmm2 );
1513  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1514  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1515  store( &(~C)(i,j+IT::size*4UL), xmm5 );
1516  store( &(~C)(i,j+IT::size*5UL), xmm6 );
1517  store( &(~C)(i,j+IT::size*6UL), xmm7 );
1518  store( &(~C)(i,j+IT::size*7UL), xmm8 );
1519  }
1520  }
1521  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
1522  size_t i( 0UL );
1523  for( ; (i+2UL) <= M; i+=2UL ) {
1524  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1525  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size ) ) );
1526  IntrinsicType xmm3( load( &(~C)(i ,j+IT::size*2UL) ) );
1527  IntrinsicType xmm4( load( &(~C)(i ,j+IT::size*3UL) ) );
1528  IntrinsicType xmm5( load( &(~C)(i+1UL,j ) ) );
1529  IntrinsicType xmm6( load( &(~C)(i+1UL,j+IT::size ) ) );
1530  IntrinsicType xmm7( load( &(~C)(i+1UL,j+IT::size*2UL) ) );
1531  IntrinsicType xmm8( load( &(~C)(i+1UL,j+IT::size*3UL) ) );
1532  for( size_t k=0UL; k<K; ++k ) {
1533  const IntrinsicType a1( set( A(i ,k) ) );
1534  const IntrinsicType a2( set( A(i+1UL,k) ) );
1535  const IntrinsicType b1( B.get(k,j ) );
1536  const IntrinsicType b2( B.get(k,j+IT::size ) );
1537  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
1538  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
1539  xmm1 = xmm1 - a1 * b1;
1540  xmm2 = xmm2 - a1 * b2;
1541  xmm3 = xmm3 - a1 * b3;
1542  xmm4 = xmm4 - a1 * b4;
1543  xmm5 = xmm5 - a2 * b1;
1544  xmm6 = xmm6 - a2 * b2;
1545  xmm7 = xmm7 - a2 * b3;
1546  xmm8 = xmm8 - a2 * b4;
1547  }
1548  store( &(~C)(i ,j ), xmm1 );
1549  store( &(~C)(i ,j+IT::size ), xmm2 );
1550  store( &(~C)(i ,j+IT::size*2UL), xmm3 );
1551  store( &(~C)(i ,j+IT::size*3UL), xmm4 );
1552  store( &(~C)(i+1UL,j ), xmm5 );
1553  store( &(~C)(i+1UL,j+IT::size ), xmm6 );
1554  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 );
1555  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 );
1556  }
1557  if( i < M ) {
1558  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1559  IntrinsicType xmm2( load( &(~C)(i,j+IT::size ) ) );
1560  IntrinsicType xmm3( load( &(~C)(i,j+IT::size*2UL) ) );
1561  IntrinsicType xmm4( load( &(~C)(i,j+IT::size*3UL) ) );
1562  for( size_t k=0UL; k<K; ++k ) {
1563  const IntrinsicType a1( set( A(i,k) ) );
1564  xmm1 = xmm1 - a1 * B.get(k,j );
1565  xmm2 = xmm2 - a1 * B.get(k,j+IT::size );
1566  xmm3 = xmm3 - a1 * B.get(k,j+IT::size*2UL);
1567  xmm4 = xmm4 - a1 * B.get(k,j+IT::size*3UL);
1568  }
1569  store( &(~C)(i,j ), xmm1 );
1570  store( &(~C)(i,j+IT::size ), xmm2 );
1571  store( &(~C)(i,j+IT::size*2UL), xmm3 );
1572  store( &(~C)(i,j+IT::size*3UL), xmm4 );
1573  }
1574  }
1575  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
1576  size_t i( 0UL );
1577  for( ; (i+2UL) <= M; i+=2UL ) {
1578  IntrinsicType xmm1( load( &(~C)(i ,j ) ) );
1579  IntrinsicType xmm2( load( &(~C)(i ,j+IT::size) ) );
1580  IntrinsicType xmm3( load( &(~C)(i+1UL,j ) ) );
1581  IntrinsicType xmm4( load( &(~C)(i+1UL,j+IT::size) ) );
1582  for( size_t k=0UL; k<K; ++k ) {
1583  const IntrinsicType a1( set( A(i ,k) ) );
1584  const IntrinsicType a2( set( A(i+1UL,k) ) );
1585  const IntrinsicType b1( B.get(k,j ) );
1586  const IntrinsicType b2( B.get(k,j+IT::size) );
1587  xmm1 = xmm1 - a1 * b1;
1588  xmm2 = xmm2 - a1 * b2;
1589  xmm3 = xmm3 - a2 * b1;
1590  xmm4 = xmm4 - a2 * b2;
1591  }
1592  store( &(~C)(i ,j ), xmm1 );
1593  store( &(~C)(i ,j+IT::size), xmm2 );
1594  store( &(~C)(i+1UL,j ), xmm3 );
1595  store( &(~C)(i+1UL,j+IT::size), xmm4 );
1596  }
1597  if( i < M ) {
1598  IntrinsicType xmm1( load( &(~C)(i,j ) ) );
1599  IntrinsicType xmm2( load( &(~C)(i,j+IT::size) ) );
1600  for( size_t k=0UL; k<K; ++k ) {
1601  const IntrinsicType a1( set( A(i,k) ) );
1602  xmm1 = xmm1 - a1 * B.get(k,j );
1603  xmm2 = xmm2 - a1 * B.get(k,j+IT::size);
1604  }
1605  store( &(~C)(i,j ), xmm1 );
1606  store( &(~C)(i,j+IT::size), xmm2 );
1607  }
1608  }
1609  if( j < N ) {
1610  size_t i( 0UL );
1611  for( ; (i+2UL) <= M; i+=2UL ) {
1612  IntrinsicType xmm1( load( &(~C)(i ,j) ) );
1613  IntrinsicType xmm2( load( &(~C)(i+1UL,j) ) );
1614  for( size_t k=0UL; k<K; ++k ) {
1615  const IntrinsicType b1( B.get(k,j) );
1616  xmm1 = xmm1 - set( A(i ,k) ) * b1;
1617  xmm2 = xmm2 - set( A(i+1UL,k) ) * b1;
1618  }
1619  store( &(~C)(i ,j), xmm1 );
1620  store( &(~C)(i+1UL,j), xmm2 );
1621  }
1622  if( i < M ) {
1623  IntrinsicType xmm1( load( &(~C)(i,j) ) );
1624  for( size_t k=0UL; k<K; ++k ) {
1625  xmm1 = xmm1 - set( A(i,k) ) * B.get(k,j);
1626  }
1627  store( &(~C)(i,j), xmm1 );
1628  }
1629  }
1630  }
1632  //**********************************************************************************************
1633 
1634  //**Vectorized default subtraction assignment to column-major dense matrices********************
1648  template< typename MT3 // Type of the left-hand side target matrix
1649  , typename MT4 // Type of the left-hand side matrix operand
1650  , typename MT5 > // Type of the right-hand side matrix operand
1651  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1652  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1653  {
1654  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
1655  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
1656 
1657  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1658  const typename MT4::OppositeType tmp( A );
1659  subAssign( ~C, tmp * B );
1660  }
1661  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1662  const typename MT5::OppositeType tmp( B );
1663  subAssign( ~C, A * tmp );
1664  }
1665  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1666  const typename MT4::OppositeType tmp( A );
1667  subAssign( ~C, tmp * B );
1668  }
1669  else {
1670  const typename MT5::OppositeType tmp( B );
1671  subAssign( ~C, A * tmp );
1672  }
1673  }
1675  //**********************************************************************************************
1676 
1677  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1691  template< typename MT3 // Type of the left-hand side target matrix
1692  , typename MT4 // Type of the left-hand side matrix operand
1693  , typename MT5 > // Type of the right-hand side matrix operand
1694  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1695  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1696  {
1697  selectDefaultSubAssignKernel( C, A, B );
1698  }
1700  //**********************************************************************************************
1701 
1702  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1703 #if BLAZE_BLAS_MODE
1704 
1717  template< typename MT3 // Type of the left-hand side target matrix
1718  , typename MT4 // Type of the left-hand side matrix operand
1719  , typename MT5 > // Type of the right-hand side matrix operand
1720  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1721  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1722  {
1723  using boost::numeric_cast;
1724 
1725  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
1726  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
1727  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
1728 
1729  const int M ( numeric_cast<int>( A.rows() ) );
1730  const int N ( numeric_cast<int>( B.columns() ) );
1731  const int K ( numeric_cast<int>( A.columns() ) );
1732  const int lda( numeric_cast<int>( A.spacing() ) );
1733  const int ldb( numeric_cast<int>( B.spacing() ) );
1734  const int ldc( numeric_cast<int>( C.spacing() ) );
1735 
1736  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1737  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1738  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1739  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1740  }
1742 #endif
1743  //**********************************************************************************************
1744 
1745  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1746 #if BLAZE_BLAS_MODE
1747 
1760  template< typename MT3 // Type of the left-hand side target matrix
1761  , typename MT4 // Type of the left-hand side matrix operand
1762  , typename MT5 > // Type of the right-hand side matrix operand
1763  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1764  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1765  {
1766  using boost::numeric_cast;
1767 
1768  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
1769  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
1770  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
1771 
1772  const int M ( numeric_cast<int>( A.rows() ) );
1773  const int N ( numeric_cast<int>( B.columns() ) );
1774  const int K ( numeric_cast<int>( A.columns() ) );
1775  const int lda( numeric_cast<int>( A.spacing() ) );
1776  const int ldb( numeric_cast<int>( B.spacing() ) );
1777  const int ldc( numeric_cast<int>( C.spacing() ) );
1778 
1779  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1780  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1781  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1782  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1783  }
1785 #endif
1786  //**********************************************************************************************
1787 
1788  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1789 #if BLAZE_BLAS_MODE
1790 
1803  template< typename MT3 // Type of the left-hand side target matrix
1804  , typename MT4 // Type of the left-hand side matrix operand
1805  , typename MT5 > // Type of the right-hand side matrix operand
1806  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1807  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1808  {
1809  using boost::numeric_cast;
1810 
1811  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1812  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1813  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1814  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1815  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1816  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1817 
1818  const int M ( numeric_cast<int>( A.rows() ) );
1819  const int N ( numeric_cast<int>( B.columns() ) );
1820  const int K ( numeric_cast<int>( A.columns() ) );
1821  const int lda( numeric_cast<int>( A.spacing() ) );
1822  const int ldb( numeric_cast<int>( B.spacing() ) );
1823  const int ldc( numeric_cast<int>( C.spacing() ) );
1824  const complex<float> alpha( -1.0F, 0.0F );
1825  const complex<float> beta ( 1.0F, 0.0F );
1826 
1827  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1828  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1829  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1830  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1831  }
1833 #endif
1834  //**********************************************************************************************
1835 
1836  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1837 #if BLAZE_BLAS_MODE
1838 
1851  template< typename MT3 // Type of the left-hand side target matrix
1852  , typename MT4 // Type of the left-hand side matrix operand
1853  , typename MT5 > // Type of the right-hand side matrix operand
1854  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1855  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1856  {
1857  using boost::numeric_cast;
1858 
1859  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
1860  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
1861  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
1862  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1863  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1864  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1865 
1866  const int M ( numeric_cast<int>( A.rows() ) );
1867  const int N ( numeric_cast<int>( B.columns() ) );
1868  const int K ( numeric_cast<int>( A.columns() ) );
1869  const int lda( numeric_cast<int>( A.spacing() ) );
1870  const int ldb( numeric_cast<int>( B.spacing() ) );
1871  const int ldc( numeric_cast<int>( C.spacing() ) );
1872  const complex<double> alpha( -1.0, 0.0 );
1873  const complex<double> beta ( 1.0, 0.0 );
1874 
1875  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1876  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1877  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1878  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1879  }
1881 #endif
1882  //**********************************************************************************************
1883 
1884  //**Subtraction assignment to sparse matrices***************************************************
1885  // No special implementation for the subtraction assignment to sparse matrices.
1886  //**********************************************************************************************
1887 
1888  //**Multiplication assignment to dense matrices*************************************************
1889  // No special implementation for the multiplication assignment to dense matrices.
1890  //**********************************************************************************************
1891 
1892  //**Multiplication assignment to sparse matrices************************************************
1893  // No special implementation for the multiplication assignment to sparse matrices.
1894  //**********************************************************************************************
1895 
1896  //**Compile time checks*************************************************************************
1903  //**********************************************************************************************
1904 };
1905 //*************************************************************************************************
1906 
1907 
1908 
1909 
1910 //=================================================================================================
1911 //
1912 // DMATSCALARMULTEXPR SPECIALIZATION
1913 //
1914 //=================================================================================================
1915 
1916 //*************************************************************************************************
1924 template< typename MT1 // Type of the left-hand side dense matrix
1925  , typename MT2 // Type of the right-hand side dense matrix
1926  , typename ST > // Type of the right-hand side scalar value
1927 class DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >
1928  : public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
1929  , private MatScalarMultExpr
1930  , private Computation
1931 {
1932  private:
1933  //**Type definitions****************************************************************************
1934  typedef DMatDMatMultExpr<MT1,MT2> MMM;
1935  typedef typename MMM::ResultType RES;
1936  typedef typename MT1::ResultType RT1;
1937  typedef typename MT2::ResultType RT2;
1938  typedef typename MT1::CompositeType CT1;
1939  typedef typename MT2::CompositeType CT2;
1940  //**********************************************************************************************
1941 
1942  //**********************************************************************************************
1944 
1947  template< typename T1, typename T2, typename T3, typename T4 >
1948  struct UseSinglePrecisionKernel {
1949  enum { value = IsFloat<typename T1::ElementType>::value &&
1950  IsFloat<typename T2::ElementType>::value &&
1951  IsFloat<typename T3::ElementType>::value &&
1952  !IsComplex<T4>::value };
1953  };
1954  //**********************************************************************************************
1955 
1956  //**********************************************************************************************
1958 
1961  template< typename T1, typename T2, typename T3, typename T4 >
1962  struct UseDoublePrecisionKernel {
1963  enum { value = IsDouble<typename T1::ElementType>::value &&
1964  IsDouble<typename T2::ElementType>::value &&
1965  IsDouble<typename T3::ElementType>::value &&
1966  !IsComplex<T4>::value };
1967  };
1968  //**********************************************************************************************
1969 
1970  //**********************************************************************************************
1972 
1975  template< typename T1, typename T2, typename T3 >
1976  struct UseSinglePrecisionComplexKernel {
1977  typedef complex<float> Type;
1978  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1979  IsSame<typename T2::ElementType,Type>::value &&
1980  IsSame<typename T3::ElementType,Type>::value };
1981  };
1982  //**********************************************************************************************
1983 
1984  //**********************************************************************************************
1986 
1989  template< typename T1, typename T2, typename T3 >
1990  struct UseDoublePrecisionComplexKernel {
1991  typedef complex<double> Type;
1992  enum { value = IsSame<typename T1::ElementType,Type>::value &&
1993  IsSame<typename T2::ElementType,Type>::value &&
1994  IsSame<typename T3::ElementType,Type>::value };
1995  };
1996  //**********************************************************************************************
1997 
1998  //**********************************************************************************************
2000 
2002  template< typename T1, typename T2, typename T3, typename T4 >
2003  struct UseDefaultKernel {
2004  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2005  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2006  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2007  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2008  };
2009  //**********************************************************************************************
2010 
2011  //**********************************************************************************************
2013 
2015  template< typename T1, typename T2, typename T3, typename T4 >
2016  struct UseVectorizedDefaultKernel {
2017  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2018  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2019  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2020  IsSame<typename T1::ElementType,T4>::value &&
2021  IntrinsicTrait<typename T1::ElementType>::addition &&
2022  IntrinsicTrait<typename T1::ElementType>::multiplication };
2023  };
2024  //**********************************************************************************************
2025 
2026  public:
2027  //**Type definitions****************************************************************************
2028  typedef DMatScalarMultExpr<MMM,ST,false> This;
2029  typedef typename MultTrait<RES,ST>::Type ResultType;
2030  typedef typename ResultType::OppositeType OppositeType;
2031  typedef typename ResultType::TransposeType TransposeType;
2032  typedef typename ResultType::ElementType ElementType;
2033  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2034  typedef const ElementType ReturnType;
2035  typedef const ResultType CompositeType;
2036 
2038  typedef const DMatDMatMultExpr<MT1,MT2> LeftOperand;
2039 
2041  typedef ST RightOperand;
2042 
2044  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
2045 
2047  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
2048  //**********************************************************************************************
2049 
2050  //**Compilation flags***************************************************************************
2052  enum { vectorizable = 0 };
2053  //**********************************************************************************************
2054 
2055  //**Constructor*********************************************************************************
2061  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2062  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2063  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2064  {}
2065  //**********************************************************************************************
2066 
2067  //**Access operator*****************************************************************************
2074  inline ReturnType operator()( size_t i, size_t j ) const {
2075  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2076  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2077  return matrix_(i,j) * scalar_;
2078  }
2079  //**********************************************************************************************
2080 
2081  //**Rows function*******************************************************************************
2086  inline size_t rows() const {
2087  return matrix_.rows();
2088  }
2089  //**********************************************************************************************
2090 
2091  //**Columns function****************************************************************************
2096  inline size_t columns() const {
2097  return matrix_.columns();
2098  }
2099  //**********************************************************************************************
2100 
2101  //**Left operand access*************************************************************************
2106  inline LeftOperand leftOperand() const {
2107  return matrix_;
2108  }
2109  //**********************************************************************************************
2110 
2111  //**Right operand access************************************************************************
2116  inline RightOperand rightOperand() const {
2117  return scalar_;
2118  }
2119  //**********************************************************************************************
2120 
2121  //**********************************************************************************************
2127  template< typename T >
2128  inline bool canAlias( const T* alias ) const {
2129  return matrix_.canAlias( alias );
2130  }
2131  //**********************************************************************************************
2132 
2133  //**********************************************************************************************
2139  template< typename T >
2140  inline bool isAliased( const T* alias ) const {
2141  return matrix_.isAliased( alias );
2142  }
2143  //**********************************************************************************************
2144 
2145  private:
2146  //**Member variables****************************************************************************
2147  LeftOperand matrix_;
2148  RightOperand scalar_;
2149  //**********************************************************************************************
2150 
2151  //**Assignment to dense matrices****************************************************************
2160  template< typename MT3 // Type of the target dense matrix
2161  , bool SO > // Storage order of the target dense matrix
2162  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2163  {
2165 
2166  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2167  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2168 
2169  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2170  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2171 
2172  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2173  return;
2174  }
2175  else if( left.columns() == 0UL ) {
2176  reset( ~lhs );
2177  return;
2178  }
2179 
2180  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2181  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2182 
2183  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2184  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2185  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2186  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2187  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2188  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2189 
2190  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
2191  DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2192  else
2193  DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2194  }
2195  //**********************************************************************************************
2196 
2197  //**Default assignment to dense matrices********************************************************
2211  template< typename MT3 // Type of the left-hand side target matrix
2212  , typename MT4 // Type of the left-hand side matrix operand
2213  , typename MT5 // Type of the right-hand side matrix operand
2214  , typename ST2 > // Type of the scalar value
2215  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2216  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2217  {
2218  const size_t M( A.rows() );
2219  const size_t N( B.columns() );
2220  const size_t K( A.columns() );
2221 
2222  for( size_t i=0UL; i<M; ++i ) {
2223  for( size_t j=0UL; j<N; ++j ) {
2224  C(i,j) = A(i,0UL) * B(0UL,j);
2225  }
2226  for( size_t k=1UL; k<K; ++k ) {
2227  for( size_t j=0UL; j<N; ++j ) {
2228  C(i,j) += A(i,k) * B(k,j);
2229  }
2230  }
2231  for( size_t j=0UL; j<N; ++j ) {
2232  C(i,j) *= scalar;
2233  }
2234  }
2235  }
2236  //**********************************************************************************************
2237 
2238  //**Vectorized default assignment to row-major dense matrices***********************************
2252  template< typename MT3 // Type of the left-hand side target matrix
2253  , typename MT4 // Type of the left-hand side matrix operand
2254  , typename MT5 // Type of the right-hand side matrix operand
2255  , typename ST2 > // Type of the scalar value
2256  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2257  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2258  {
2259  typedef IntrinsicTrait<ElementType> IT;
2260 
2261  const size_t M( A.rows() );
2262  const size_t N( B.spacing() );
2263  const size_t K( A.columns() );
2264 
2265  const IntrinsicType factor( set( scalar ) );
2266 
2267  size_t j( 0UL );
2268 
2269  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2270  for( size_t i=0UL; i<M; ++i ) {
2271  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2272  for( size_t k=0UL; k<K; ++k ) {
2273  const IntrinsicType a1( set( A(i,k) ) );
2274  xmm1 = xmm1 + a1 * B.get(k,j );
2275  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2276  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2277  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2278  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
2279  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
2280  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
2281  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
2282  }
2283  store( &(~C)(i,j ), xmm1 * factor );
2284  store( &(~C)(i,j+IT::size ), xmm2 * factor );
2285  store( &(~C)(i,j+IT::size*2UL), xmm3 * factor );
2286  store( &(~C)(i,j+IT::size*3UL), xmm4 * factor );
2287  store( &(~C)(i,j+IT::size*4UL), xmm5 * factor );
2288  store( &(~C)(i,j+IT::size*5UL), xmm6 * factor );
2289  store( &(~C)(i,j+IT::size*6UL), xmm7 * factor );
2290  store( &(~C)(i,j+IT::size*7UL), xmm8 * factor );
2291  }
2292  }
2293  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2294  size_t i( 0UL );
2295  for( ; (i+2UL) <= M; i+=2UL ) {
2296  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2297  for( size_t k=0UL; k<K; ++k ) {
2298  const IntrinsicType a1( set( A(i ,k) ) );
2299  const IntrinsicType a2( set( A(i+1UL,k) ) );
2300  const IntrinsicType b1( B.get(k,j ) );
2301  const IntrinsicType b2( B.get(k,j+IT::size ) );
2302  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
2303  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
2304  xmm1 = xmm1 + a1 * b1;
2305  xmm2 = xmm2 + a1 * b2;
2306  xmm3 = xmm3 + a1 * b3;
2307  xmm4 = xmm4 + a1 * b4;
2308  xmm5 = xmm5 + a2 * b1;
2309  xmm6 = xmm6 + a2 * b2;
2310  xmm7 = xmm7 + a2 * b3;
2311  xmm8 = xmm8 + a2 * b4;
2312  }
2313  store( &(~C)(i ,j ), xmm1 * factor );
2314  store( &(~C)(i ,j+IT::size ), xmm2 * factor );
2315  store( &(~C)(i ,j+IT::size*2UL), xmm3 * factor );
2316  store( &(~C)(i ,j+IT::size*3UL), xmm4 * factor );
2317  store( &(~C)(i+1UL,j ), xmm5 * factor );
2318  store( &(~C)(i+1UL,j+IT::size ), xmm6 * factor );
2319  store( &(~C)(i+1UL,j+IT::size*2UL), xmm7 * factor );
2320  store( &(~C)(i+1UL,j+IT::size*3UL), xmm8 * factor );
2321  }
2322  if( i < M ) {
2323  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2324  for( size_t k=0UL; k<K; ++k ) {
2325  const IntrinsicType a1( set( A(i,k) ) );
2326  xmm1 = xmm1 + a1 * B.get(k,j );
2327  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2328  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2329  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2330  }
2331  store( &(~C)(i,j ), xmm1 * factor );
2332  store( &(~C)(i,j+IT::size ), xmm2 * factor );
2333  store( &(~C)(i,j+IT::size*2UL), xmm3 * factor );
2334  store( &(~C)(i,j+IT::size*3UL), xmm4 * factor );
2335  }
2336  }
2337  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2338  size_t i( 0UL );
2339  for( ; (i+2UL) <= M; i+=2UL ) {
2340  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2341  for( size_t k=0UL; k<K; ++k ) {
2342  const IntrinsicType a1( set( A(i ,k) ) );
2343  const IntrinsicType a2( set( A(i+1UL,k) ) );
2344  const IntrinsicType b1( B.get(k,j ) );
2345  const IntrinsicType b2( B.get(k,j+IT::size) );
2346  xmm1 = xmm1 + a1 * b1;
2347  xmm2 = xmm2 + a1 * b2;
2348  xmm3 = xmm3 + a2 * b1;
2349  xmm4 = xmm4 + a2 * b2;
2350  }
2351  store( &(~C)(i ,j ), xmm1 * factor );
2352  store( &(~C)(i ,j+IT::size), xmm2 * factor );
2353  store( &(~C)(i+1UL,j ), xmm3 * factor );
2354  store( &(~C)(i+1UL,j+IT::size), xmm4 * factor );
2355  }
2356  if( i < M ) {
2357  IntrinsicType xmm1, xmm2;
2358  for( size_t k=0UL; k<K; ++k ) {
2359  const IntrinsicType a1( set( A(i,k) ) );
2360  xmm1 = xmm1 + a1 * B.get(k,j );
2361  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
2362  }
2363  store( &(~C)(i,j ), xmm1 * factor );
2364  store( &(~C)(i,j+IT::size), xmm2 * factor );
2365  }
2366  }
2367  if( j < N ) {
2368  size_t i( 0UL );
2369  for( ; (i+2UL) <= M; i+=2UL ) {
2370  IntrinsicType xmm1, xmm2;
2371  for( size_t k=0UL; k<K; ++k ) {
2372  const IntrinsicType b1( B.get(k,j) );
2373  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2374  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2375  }
2376  store( &(~C)(i ,j), xmm1 * factor );
2377  store( &(~C)(i+1UL,j), xmm2 * factor );
2378  }
2379  if( i < M ) {
2380  IntrinsicType xmm1;
2381  for( size_t k=0UL; k<K; ++k ) {
2382  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
2383  }
2384  store( &(~C)(i,j), xmm1 * factor );
2385  }
2386  }
2387  }
2388  //**********************************************************************************************
2389 
2390  //**Vectorized default assignment to column-major dense matrices********************************
2404  template< typename MT3 // Type of the left-hand side target matrix
2405  , typename MT4 // Type of the left-hand side matrix operand
2406  , typename MT5 // Type of the right-hand side matrix operand
2407  , typename ST2 > // Type of the scalar value
2408  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2409  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2410  {
2411  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2412  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2413 
2414  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2415  const typename MT4::OppositeType tmp( A );
2416  assign( ~C, tmp * B * scalar );
2417  }
2418  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2419  const typename MT5::OppositeType tmp( B );
2420  assign( ~C, A * tmp * scalar );
2421  }
2422  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2423  const typename MT4::OppositeType tmp( A );
2424  assign( ~C, tmp * B * scalar );
2425  }
2426  else {
2427  const typename MT5::OppositeType tmp( B );
2428  assign( ~C, A * tmp * scalar );
2429  }
2430  }
2431  //**********************************************************************************************
2432 
2433  //**BLAS-based assignment to dense matrices (default)*******************************************
2447  template< typename MT3 // Type of the left-hand side target matrix
2448  , typename MT4 // Type of the left-hand side matrix operand
2449  , typename MT5 // Type of the right-hand side matrix operand
2450  , typename ST2 > // Type of the scalar value
2451  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2452  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2453  {
2454  selectDefaultAssignKernel( C, A, B, scalar );
2455  }
2456  //**********************************************************************************************
2457 
2458  //**BLAS-based assignment to dense matrices (single precision)**********************************
2459 #if BLAZE_BLAS_MODE
2460 
2473  template< typename MT3 // Type of the left-hand side target matrix
2474  , typename MT4 // Type of the left-hand side matrix operand
2475  , typename MT5 // Type of the right-hand side matrix operand
2476  , typename ST2 > // Type of the scalar value
2477  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2478  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2479  {
2480  using boost::numeric_cast;
2481 
2482  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2483  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2484  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2485 
2486  const int M ( numeric_cast<int>( A.rows() ) );
2487  const int N ( numeric_cast<int>( B.columns() ) );
2488  const int K ( numeric_cast<int>( A.columns() ) );
2489  const int lda( numeric_cast<int>( A.spacing() ) );
2490  const int ldb( numeric_cast<int>( B.spacing() ) );
2491  const int ldc( numeric_cast<int>( C.spacing() ) );
2492 
2493  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2494  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2495  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2496  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2497  }
2498 #endif
2499  //**********************************************************************************************
2500 
2501  //**BLAS-based assignment to dense matrices (double precision)**********************************
2502 #if BLAZE_BLAS_MODE
2503 
2516  template< typename MT3 // Type of the left-hand side target matrix
2517  , typename MT4 // Type of the left-hand side matrix operand
2518  , typename MT5 // Type of the right-hand side matrix operand
2519  , typename ST2 > // Type of the scalar value
2520  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2521  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2522  {
2523  using boost::numeric_cast;
2524 
2525  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
2526  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
2527  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
2528 
2529  const int M ( numeric_cast<int>( A.rows() ) );
2530  const int N ( numeric_cast<int>( B.columns() ) );
2531  const int K ( numeric_cast<int>( A.columns() ) );
2532  const int lda( numeric_cast<int>( A.spacing() ) );
2533  const int ldb( numeric_cast<int>( B.spacing() ) );
2534  const int ldc( numeric_cast<int>( C.spacing() ) );
2535 
2536  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2537  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2538  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2539  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2540  }
2541 #endif
2542  //**********************************************************************************************
2543 
2544  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2545 #if BLAZE_BLAS_MODE
2546 
2559  template< typename MT3 // Type of the left-hand side target matrix
2560  , typename MT4 // Type of the left-hand side matrix operand
2561  , typename MT5 // Type of the right-hand side matrix operand
2562  , typename ST2 > // Type of the scalar value
2563  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2564  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2565  {
2566  using boost::numeric_cast;
2567 
2568  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2569  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2570  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2571  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2572  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2573  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2574 
2575  const int M ( numeric_cast<int>( A.rows() ) );
2576  const int N ( numeric_cast<int>( B.columns() ) );
2577  const int K ( numeric_cast<int>( A.columns() ) );
2578  const int lda( numeric_cast<int>( A.spacing() ) );
2579  const int ldb( numeric_cast<int>( B.spacing() ) );
2580  const int ldc( numeric_cast<int>( C.spacing() ) );
2581  const complex<float> alpha( scalar );
2582  const complex<float> beta ( 0.0F, 0.0F );
2583 
2584  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2585  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2586  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2587  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2588  }
2589 #endif
2590  //**********************************************************************************************
2591 
2592  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2593 #if BLAZE_BLAS_MODE
2594 
2607  template< typename MT3 // Type of the left-hand side target matrix
2608  , typename MT4 // Type of the left-hand side matrix operand
2609  , typename MT5 // Type of the right-hand side matrix operand
2610  , typename ST2 > // Type of the scalar
2611  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2612  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2613  {
2614  using boost::numeric_cast;
2615 
2616  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
2617  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
2618  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
2619  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2620  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2621  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2622 
2623  const int M ( numeric_cast<int>( A.rows() ) );
2624  const int N ( numeric_cast<int>( B.columns() ) );
2625  const int K ( numeric_cast<int>( A.columns() ) );
2626  const int lda( numeric_cast<int>( A.spacing() ) );
2627  const int ldb( numeric_cast<int>( B.spacing() ) );
2628  const int ldc( numeric_cast<int>( C.spacing() ) );
2629  const complex<double> alpha( scalar );
2630  const complex<double> beta ( 0.0, 0.0 );
2631 
2632  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2633  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2634  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2635  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2636  }
2637 #endif
2638  //**********************************************************************************************
2639 
2640  //**Assignment to sparse matrices***************************************************************
2651  template< typename MT // Type of the target sparse matrix
2652  , bool SO > // Storage order of the target sparse matrix
2653  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2654  {
2656 
2657  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2658 
2664  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( typename TmpType::CompositeType );
2665 
2666  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2667  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2668 
2669  const TmpType tmp( rhs );
2670  assign( ~lhs, tmp );
2671  }
2672  //**********************************************************************************************
2673 
2674  //**Addition assignment to dense matrices*******************************************************
2686  template< typename MT3 // Type of the target dense matrix
2687  , bool SO > // Storage order of the target dense matrix
2688  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2689  {
2691 
2692  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2693  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2694 
2695  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2696  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2697 
2698  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2699  return;
2700  }
2701 
2702  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2703  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2704 
2705  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2706  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2707  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2708  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2709  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2710  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2711 
2712  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
2713  DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2714  else
2715  DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2716  }
2717  //**********************************************************************************************
2718 
2719  //**Default addition assignment to dense matrices***********************************************
2733  template< typename MT3 // Type of the left-hand side target matrix
2734  , typename MT4 // Type of the left-hand side matrix operand
2735  , typename MT5 // Type of the right-hand side matrix operand
2736  , typename ST2 > // Type of the scalar value
2737  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2738  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2739  {
2740  const ResultType tmp( A * B * scalar );
2741  addAssign( C, tmp );
2742  }
2743  //**********************************************************************************************
2744 
2745  //**Vectorized default addition assignment to row-major dense matrices**************************
2759  template< typename MT3 // Type of the left-hand side target matrix
2760  , typename MT4 // Type of the left-hand side matrix operand
2761  , typename MT5 // Type of the right-hand side matrix operand
2762  , typename ST2 > // Type of the scalar value
2763  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2764  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2765  {
2766  typedef IntrinsicTrait<ElementType> IT;
2767 
2768  const size_t M( A.rows() );
2769  const size_t N( B.spacing() );
2770  const size_t K( A.columns() );
2771 
2772  const IntrinsicType factor( set( scalar ) );
2773 
2774  size_t j( 0UL );
2775 
2776  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
2777  for( size_t i=0UL; i<M; ++i ) {
2778  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2779  for( size_t k=0UL; k<K; ++k ) {
2780  const IntrinsicType a1( set( A(i,k) ) );
2781  xmm1 = xmm1 + a1 * B.get(k,j );
2782  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2783  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2784  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2785  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
2786  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
2787  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
2788  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
2789  }
2790  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2791  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) + xmm2 * factor );
2792  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) + xmm3 * factor );
2793  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) + xmm4 * factor );
2794  store( &(~C)(i,j+IT::size*4UL), load( &(~C)(i,j+IT::size*4UL) ) + xmm5 * factor );
2795  store( &(~C)(i,j+IT::size*5UL), load( &(~C)(i,j+IT::size*5UL) ) + xmm6 * factor );
2796  store( &(~C)(i,j+IT::size*6UL), load( &(~C)(i,j+IT::size*6UL) ) + xmm7 * factor );
2797  store( &(~C)(i,j+IT::size*7UL), load( &(~C)(i,j+IT::size*7UL) ) + xmm8 * factor );
2798  }
2799  }
2800  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
2801  size_t i( 0UL );
2802  for( ; (i+2UL) <= M; i+=2UL ) {
2803  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2804  for( size_t k=0UL; k<K; ++k ) {
2805  const IntrinsicType a1( set( A(i ,k) ) );
2806  const IntrinsicType a2( set( A(i+1UL,k) ) );
2807  const IntrinsicType b1( B.get(k,j ) );
2808  const IntrinsicType b2( B.get(k,j+IT::size ) );
2809  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
2810  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
2811  xmm1 = xmm1 + a1 * b1;
2812  xmm2 = xmm2 + a1 * b2;
2813  xmm3 = xmm3 + a1 * b3;
2814  xmm4 = xmm4 + a1 * b4;
2815  xmm5 = xmm5 + a2 * b1;
2816  xmm6 = xmm6 + a2 * b2;
2817  xmm7 = xmm7 + a2 * b3;
2818  xmm8 = xmm8 + a2 * b4;
2819  }
2820  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2821  store( &(~C)(i ,j+IT::size ), load( &(~C)(i ,j+IT::size ) ) + xmm2 * factor );
2822  store( &(~C)(i ,j+IT::size*2UL), load( &(~C)(i ,j+IT::size*2UL) ) + xmm3 * factor );
2823  store( &(~C)(i ,j+IT::size*3UL), load( &(~C)(i ,j+IT::size*3UL) ) + xmm4 * factor );
2824  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) + xmm5 * factor );
2825  store( &(~C)(i+1UL,j+IT::size ), load( &(~C)(i+1UL,j+IT::size ) ) + xmm6 * factor );
2826  store( &(~C)(i+1UL,j+IT::size*2UL), load( &(~C)(i+1UL,j+IT::size*2UL) ) + xmm7 * factor );
2827  store( &(~C)(i+1UL,j+IT::size*3UL), load( &(~C)(i+1UL,j+IT::size*3UL) ) + xmm8 * factor );
2828  }
2829  if( i < M ) {
2830  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2831  for( size_t k=0UL; k<K; ++k ) {
2832  const IntrinsicType a1( set( A(i,k) ) );
2833  xmm1 = xmm1 + a1 * B.get(k,j );
2834  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
2835  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
2836  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
2837  }
2838  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2839  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) + xmm2 * factor );
2840  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) + xmm3 * factor );
2841  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) + xmm4 * factor );
2842  }
2843  }
2844  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
2845  size_t i( 0UL );
2846  for( ; (i+2UL) <= M; i+=2UL ) {
2847  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2848  for( size_t k=0UL; k<K; ++k ) {
2849  const IntrinsicType a1( set( A(i ,k) ) );
2850  const IntrinsicType a2( set( A(i+1UL,k) ) );
2851  const IntrinsicType b1( B.get(k,j ) );
2852  const IntrinsicType b2( B.get(k,j+IT::size) );
2853  xmm1 = xmm1 + a1 * b1;
2854  xmm2 = xmm2 + a1 * b2;
2855  xmm3 = xmm3 + a2 * b1;
2856  xmm4 = xmm4 + a2 * b2;
2857  }
2858  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) + xmm1 * factor );
2859  store( &(~C)(i ,j+IT::size), load( &(~C)(i ,j+IT::size) ) + xmm2 * factor );
2860  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) + xmm3 * factor );
2861  store( &(~C)(i+1UL,j+IT::size), load( &(~C)(i+1UL,j+IT::size) ) + xmm4 * factor );
2862  }
2863  if( i < M ) {
2864  IntrinsicType xmm1, xmm2;
2865  for( size_t k=0UL; k<K; ++k ) {
2866  const IntrinsicType a1( set( A(i,k) ) );
2867  xmm1 = xmm1 + a1 * B.get(k,j );
2868  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
2869  }
2870  store( &(~C)(i,j ), load( &(~C)(i,j ) ) + xmm1 * factor );
2871  store( &(~C)(i,j+IT::size), load( &(~C)(i,j+IT::size) ) + xmm2 * factor );
2872  }
2873  }
2874  if( j < N ) {
2875  size_t i( 0UL );
2876  for( ; (i+2UL) <= M; i+=2UL ) {
2877  IntrinsicType xmm1, xmm2;
2878  for( size_t k=0UL; k<K; ++k ) {
2879  const IntrinsicType b1( B.get(k,j) );
2880  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2881  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2882  }
2883  store( &(~C)(i ,j), load( &(~C)(i ,j) ) + xmm1 * factor );
2884  store( &(~C)(i+1UL,j), load( &(~C)(i+1UL,j) ) + xmm2 * factor );
2885  }
2886  if( i < M ) {
2887  IntrinsicType xmm1;
2888  for( size_t k=0UL; k<K; ++k ) {
2889  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
2890  }
2891  store( &(~C)(i,j), load( &(~C)(i,j) ) + xmm1 * factor );
2892  }
2893  }
2894  }
2895  //**********************************************************************************************
2896 
2897  //**Vectorized default addition assignment to column-major dense matrices***********************
2911  template< typename MT3 // Type of the left-hand side target matrix
2912  , typename MT4 // Type of the left-hand side matrix operand
2913  , typename MT5 // Type of the right-hand side matrix operand
2914  , typename ST2 > // Type of the scalar value
2915  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2916  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2917  {
2918  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
2919  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
2920 
2921  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2922  const typename MT4::OppositeType tmp( A );
2923  addAssign( ~C, tmp * B * scalar );
2924  }
2925  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2926  const typename MT5::OppositeType tmp( B );
2927  addAssign( ~C, A * tmp * scalar );
2928  }
2929  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2930  const typename MT4::OppositeType tmp( A );
2931  addAssign( ~C, tmp * B * scalar );
2932  }
2933  else {
2934  const typename MT5::OppositeType tmp( B );
2935  addAssign( ~C, A * tmp * scalar );
2936  }
2937  }
2938  //**********************************************************************************************
2939 
2940  //**BLAS-based addition assignment to dense matrices (default)**********************************
2954  template< typename MT3 // Type of the left-hand side target matrix
2955  , typename MT4 // Type of the left-hand side matrix operand
2956  , typename MT5 // Type of the right-hand side matrix operand
2957  , typename ST2 > // Type of the scalar value
2958  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2959  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2960  {
2961  selectDefaultAddAssignKernel( C, A, B, scalar );
2962  }
2963  //**********************************************************************************************
2964 
2965  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2966 #if BLAZE_BLAS_MODE
2967 
2980  template< typename MT3 // Type of the left-hand side target matrix
2981  , typename MT4 // Type of the left-hand side matrix operand
2982  , typename MT5 // Type of the right-hand side matrix operand
2983  , typename ST2 > // Type of the scalar value
2984  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2985  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2986  {
2987  using boost::numeric_cast;
2988 
2989  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
2990  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
2991  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
2992 
2993  const int M ( numeric_cast<int>( A.rows() ) );
2994  const int N ( numeric_cast<int>( B.columns() ) );
2995  const int K ( numeric_cast<int>( A.columns() ) );
2996  const int lda( numeric_cast<int>( A.spacing() ) );
2997  const int ldb( numeric_cast<int>( B.spacing() ) );
2998  const int ldc( numeric_cast<int>( C.spacing() ) );
2999 
3000  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3001  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3002  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3003  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3004  }
3005 #endif
3006  //**********************************************************************************************
3007 
3008  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3009 #if BLAZE_BLAS_MODE
3010 
3023  template< typename MT3 // Type of the left-hand side target matrix
3024  , typename MT4 // Type of the left-hand side matrix operand
3025  , typename MT5 // Type of the right-hand side matrix operand
3026  , typename ST2 > // Type of the scalar value
3027  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3028  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3029  {
3030  using boost::numeric_cast;
3031 
3032  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3033  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3034  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3035 
3036  const int M ( numeric_cast<int>( A.rows() ) );
3037  const int N ( numeric_cast<int>( B.columns() ) );
3038  const int K ( numeric_cast<int>( A.columns() ) );
3039  const int lda( numeric_cast<int>( A.spacing() ) );
3040  const int ldb( numeric_cast<int>( B.spacing() ) );
3041  const int ldc( numeric_cast<int>( C.spacing() ) );
3042 
3043  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3044  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3045  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3046  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3047  }
3048 #endif
3049  //**********************************************************************************************
3050 
3051  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3052 #if BLAZE_BLAS_MODE
3053 
3066  template< typename MT3 // Type of the left-hand side target matrix
3067  , typename MT4 // Type of the left-hand side matrix operand
3068  , typename MT5 // Type of the right-hand side matrix operand
3069  , typename ST2 > // Type of the scalar value
3070  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3071  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3072  {
3073  using boost::numeric_cast;
3074 
3075  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3076  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3077  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3078  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3079  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3080  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3081 
3082  const int M ( numeric_cast<int>( A.rows() ) );
3083  const int N ( numeric_cast<int>( B.columns() ) );
3084  const int K ( numeric_cast<int>( A.columns() ) );
3085  const int lda( numeric_cast<int>( A.spacing() ) );
3086  const int ldb( numeric_cast<int>( B.spacing() ) );
3087  const int ldc( numeric_cast<int>( C.spacing() ) );
3088  const complex<float> alpha( scalar );
3089  const complex<float> beta ( 1.0F, 0.0F );
3090 
3091  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3092  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3093  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3094  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3095  }
3096 #endif
3097  //**********************************************************************************************
3098 
3099  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3100 #if BLAZE_BLAS_MODE
3101 
3114  template< typename MT3 // Type of the left-hand side target matrix
3115  , typename MT4 // Type of the left-hand side matrix operand
3116  , typename MT5 // Type of the right-hand side matrix operand
3117  , typename ST2 > // Type of the scalar value
3118  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3119  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3120  {
3121  using boost::numeric_cast;
3122 
3123  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3124  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3125  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3126  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3127  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3128  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3129 
3130  const int M ( numeric_cast<int>( A.rows() ) );
3131  const int N ( numeric_cast<int>( B.columns() ) );
3132  const int K ( numeric_cast<int>( A.columns() ) );
3133  const int lda( numeric_cast<int>( A.spacing() ) );
3134  const int ldb( numeric_cast<int>( B.spacing() ) );
3135  const int ldc( numeric_cast<int>( C.spacing() ) );
3136  const complex<double> alpha( scalar );
3137  const complex<double> beta ( 1.0, 0.0 );
3138 
3139  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3140  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3141  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3142  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3143  }
3144 #endif
3145  //**********************************************************************************************
3146 
3147  //**Addition assignment to sparse matrices******************************************************
3148  // No special implementation for the addition assignment to sparse matrices.
3149  //**********************************************************************************************
3150 
3151  //**Subtraction assignment to dense matrices****************************************************
3163  template< typename MT3 // Type of the target dense matrix
3164  , bool SO > // Storage order of the target dense matrix
3165  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3166  {
3168 
3169  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3170  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3171 
3172  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3173  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3174 
3175  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3176  return;
3177  }
3178 
3179  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3180  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3181 
3182  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3183  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3184  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3185  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3186  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3187  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3188 
3189  if( (~lhs).rows() * (~lhs).columns() < DMATDMATMULT_THRESHOLD )
3190  DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3191  else
3192  DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3193  }
3194  //**********************************************************************************************
3195 
3196  //**Default subtraction assignment to dense matrices********************************************
3210  template< typename MT3 // Type of the left-hand side target matrix
3211  , typename MT4 // Type of the left-hand side matrix operand
3212  , typename MT5 // Type of the right-hand side matrix operand
3213  , typename ST2 > // Type of the scalar value
3214  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3215  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3216  {
3217  const ResultType tmp( A * B * scalar );
3218  subAssign( C, tmp );
3219  }
3220  //**********************************************************************************************
3221 
3222  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3236  template< typename MT3 // Type of the left-hand side target matrix
3237  , typename MT4 // Type of the left-hand side matrix operand
3238  , typename MT5 // Type of the right-hand side matrix operand
3239  , typename ST2 > // Type of the scalar value
3240  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3241  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3242  {
3243  typedef IntrinsicTrait<ElementType> IT;
3244 
3245  const size_t M( A.rows() );
3246  const size_t N( B.spacing() );
3247  const size_t K( A.columns() );
3248 
3249  const IntrinsicType factor( set( scalar ) );
3250 
3251  size_t j( 0UL );
3252 
3253  for( ; (j+IT::size*8UL) <= N; j+=IT::size*8UL ) {
3254  for( size_t i=0UL; i<M; ++i ) {
3255  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3256  for( size_t k=0UL; k<K; ++k ) {
3257  const IntrinsicType a1( set( A(i,k) ) );
3258  xmm1 = xmm1 + a1 * B.get(k,j );
3259  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
3260  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
3261  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
3262  xmm5 = xmm5 + a1 * B.get(k,j+IT::size*4UL);
3263  xmm6 = xmm6 + a1 * B.get(k,j+IT::size*5UL);
3264  xmm7 = xmm7 + a1 * B.get(k,j+IT::size*6UL);
3265  xmm8 = xmm8 + a1 * B.get(k,j+IT::size*7UL);
3266  }
3267  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3268  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) - xmm2 * factor );
3269  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) - xmm3 * factor );
3270  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) - xmm4 * factor );
3271  store( &(~C)(i,j+IT::size*4UL), load( &(~C)(i,j+IT::size*4UL) ) - xmm5 * factor );
3272  store( &(~C)(i,j+IT::size*5UL), load( &(~C)(i,j+IT::size*5UL) ) - xmm6 * factor );
3273  store( &(~C)(i,j+IT::size*6UL), load( &(~C)(i,j+IT::size*6UL) ) - xmm7 * factor );
3274  store( &(~C)(i,j+IT::size*7UL), load( &(~C)(i,j+IT::size*7UL) ) - xmm8 * factor );
3275  }
3276  }
3277  for( ; (j+IT::size*4UL) <= N; j+=IT::size*4UL ) {
3278  size_t i( 0UL );
3279  for( ; (i+2UL) <= M; i+=2UL ) {
3280  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3281  for( size_t k=0UL; k<K; ++k ) {
3282  const IntrinsicType a1( set( A(i ,k) ) );
3283  const IntrinsicType a2( set( A(i+1UL,k) ) );
3284  const IntrinsicType b1( B.get(k,j ) );
3285  const IntrinsicType b2( B.get(k,j+IT::size ) );
3286  const IntrinsicType b3( B.get(k,j+IT::size*2UL) );
3287  const IntrinsicType b4( B.get(k,j+IT::size*3UL) );
3288  xmm1 = xmm1 + a1 * b1;
3289  xmm2 = xmm2 + a1 * b2;
3290  xmm3 = xmm3 + a1 * b3;
3291  xmm4 = xmm4 + a1 * b4;
3292  xmm5 = xmm5 + a2 * b1;
3293  xmm6 = xmm6 + a2 * b2;
3294  xmm7 = xmm7 + a2 * b3;
3295  xmm8 = xmm8 + a2 * b4;
3296  }
3297  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3298  store( &(~C)(i ,j+IT::size ), load( &(~C)(i ,j+IT::size ) ) - xmm2 * factor );
3299  store( &(~C)(i ,j+IT::size*2UL), load( &(~C)(i ,j+IT::size*2UL) ) - xmm3 * factor );
3300  store( &(~C)(i ,j+IT::size*3UL), load( &(~C)(i ,j+IT::size*3UL) ) - xmm4 * factor );
3301  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) - xmm5 * factor );
3302  store( &(~C)(i+1UL,j+IT::size ), load( &(~C)(i+1UL,j+IT::size ) ) - xmm6 * factor );
3303  store( &(~C)(i+1UL,j+IT::size*2UL), load( &(~C)(i+1UL,j+IT::size*2UL) ) - xmm7 * factor );
3304  store( &(~C)(i+1UL,j+IT::size*3UL), load( &(~C)(i+1UL,j+IT::size*3UL) ) - xmm8 * factor );
3305  }
3306  if( i < M ) {
3307  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3308  for( size_t k=0UL; k<K; ++k ) {
3309  const IntrinsicType a1( set( A(i,k) ) );
3310  xmm1 = xmm1 + a1 * B.get(k,j );
3311  xmm2 = xmm2 + a1 * B.get(k,j+IT::size );
3312  xmm3 = xmm3 + a1 * B.get(k,j+IT::size*2UL);
3313  xmm4 = xmm4 + a1 * B.get(k,j+IT::size*3UL);
3314  }
3315  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3316  store( &(~C)(i,j+IT::size ), load( &(~C)(i,j+IT::size ) ) - xmm2 * factor );
3317  store( &(~C)(i,j+IT::size*2UL), load( &(~C)(i,j+IT::size*2UL) ) - xmm3 * factor );
3318  store( &(~C)(i,j+IT::size*3UL), load( &(~C)(i,j+IT::size*3UL) ) - xmm4 * factor );
3319  }
3320  }
3321  for( ; (j+IT::size*2UL) <= N; j+=IT::size*2UL ) {
3322  size_t i( 0UL );
3323  for( ; (i+2UL) <= M; i+=2UL ) {
3324  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3325  for( size_t k=0UL; k<K; ++k ) {
3326  const IntrinsicType a1( set( A(i ,k) ) );
3327  const IntrinsicType a2( set( A(i+1UL,k) ) );
3328  const IntrinsicType b1( B.get(k,j ) );
3329  const IntrinsicType b2( B.get(k,j+IT::size) );
3330  xmm1 = xmm1 + a1 * b1;
3331  xmm2 = xmm2 + a1 * b2;
3332  xmm3 = xmm3 + a2 * b1;
3333  xmm4 = xmm4 + a2 * b2;
3334  }
3335  store( &(~C)(i ,j ), load( &(~C)(i ,j ) ) - xmm1 * factor );
3336  store( &(~C)(i ,j+IT::size), load( &(~C)(i ,j+IT::size) ) - xmm2 * factor );
3337  store( &(~C)(i+1UL,j ), load( &(~C)(i+1UL,j ) ) - xmm3 * factor );
3338  store( &(~C)(i+1UL,j+IT::size), load( &(~C)(i+1UL,j+IT::size) ) - xmm4 * factor );
3339  }
3340  if( i < M ) {
3341  IntrinsicType xmm1, xmm2;
3342  for( size_t k=0UL; k<K; ++k ) {
3343  const IntrinsicType a1( set( A(i,k) ) );
3344  xmm1 = xmm1 + a1 * B.get(k,j );
3345  xmm2 = xmm2 + a1 * B.get(k,j+IT::size);
3346  }
3347  store( &(~C)(i,j ), load( &(~C)(i,j ) ) - xmm1 * factor );
3348  store( &(~C)(i,j+IT::size), load( &(~C)(i,j+IT::size) ) - xmm2 * factor );
3349  }
3350  }
3351  if( j < N ) {
3352  size_t i( 0UL );
3353  for( ; (i+2UL) <= M; i+=2UL ) {
3354  IntrinsicType xmm1, xmm2;
3355  for( size_t k=0UL; k<K; ++k ) {
3356  const IntrinsicType b1( B.get(k,j) );
3357  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3358  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3359  }
3360  store( &(~C)(i ,j), load( &(~C)(i ,j) ) - xmm1 * factor );
3361  store( &(~C)(i+1UL,j), load( &(~C)(i+1UL,j) ) - xmm2 * factor );
3362  }
3363  if( i < M ) {
3364  IntrinsicType xmm1;
3365  for( size_t k=0UL; k<K; ++k ) {
3366  xmm1 = xmm1 + set( A(i,k) ) * B.get(k,j);
3367  }
3368  store( &(~C)(i,j), load( &(~C)(i,j) ) - xmm1 * factor );
3369  }
3370  }
3371  }
3372  //**********************************************************************************************
3373 
3374  //**Vectorized default subtraction assignment to column-major dense matrices********************
3388  template< typename MT3 // Type of the left-hand side target matrix
3389  , typename MT4 // Type of the left-hand side matrix operand
3390  , typename MT5 // Type of the right-hand side matrix operand
3391  , typename ST2 > // Type of the scalar value
3392  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3393  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3394  {
3395  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT4::OppositeType );
3396  BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE( typename MT5::OppositeType );
3397 
3398  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3399  const typename MT4::OppositeType tmp( A );
3400  subAssign( ~C, tmp * B * scalar );
3401  }
3402  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3403  const typename MT5::OppositeType tmp( B );
3404  subAssign( ~C, A * tmp * scalar );
3405  }
3406  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3407  const typename MT4::OppositeType tmp( A );
3408  subAssign( ~C, tmp * B * scalar );
3409  }
3410  else {
3411  const typename MT5::OppositeType tmp( B );
3412  subAssign( ~C, A * tmp * scalar );
3413  }
3414  }
3415  //**********************************************************************************************
3416 
3417  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3431  template< typename MT3 // Type of the left-hand side target matrix
3432  , typename MT4 // Type of the left-hand side matrix operand
3433  , typename MT5 // Type of the right-hand side matrix operand
3434  , typename ST2 > // Type of the scalar value
3435  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3436  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3437  {
3438  selectDefaultSubAssignKernel( C, A, B, scalar );
3439  }
3440  //**********************************************************************************************
3441 
3442  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3443 #if BLAZE_BLAS_MODE
3444 
3457  template< typename MT3 // Type of the left-hand side target matrix
3458  , typename MT4 // Type of the left-hand side matrix operand
3459  , typename MT5 // Type of the right-hand side matrix operand
3460  , typename ST2 > // Type of the scalar value
3461  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3462  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3463  {
3464  using boost::numeric_cast;
3465 
3466  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT3::ElementType );
3467  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT4::ElementType );
3468  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE( typename MT5::ElementType );
3469 
3470  const int M ( numeric_cast<int>( A.rows() ) );
3471  const int N ( numeric_cast<int>( B.columns() ) );
3472  const int K ( numeric_cast<int>( A.columns() ) );
3473  const int lda( numeric_cast<int>( A.spacing() ) );
3474  const int ldb( numeric_cast<int>( B.spacing() ) );
3475  const int ldc( numeric_cast<int>( C.spacing() ) );
3476 
3477  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3478  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3479  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3480  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3481  }
3482 #endif
3483  //**********************************************************************************************
3484 
3485  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3486 #if BLAZE_BLAS_MODE
3487 
3500  template< typename MT3 // Type of the left-hand side target matrix
3501  , typename MT4 // Type of the left-hand side matrix operand
3502  , typename MT5 // Type of the right-hand side matrix operand
3503  , typename ST2 > // Type of the scalar value
3504  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3505  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3506  {
3507  using boost::numeric_cast;
3508 
3509  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT3::ElementType );
3510  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT4::ElementType );
3511  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE( typename MT5::ElementType );
3512 
3513  const int M ( numeric_cast<int>( A.rows() ) );
3514  const int N ( numeric_cast<int>( B.columns() ) );
3515  const int K ( numeric_cast<int>( A.columns() ) );
3516  const int lda( numeric_cast<int>( A.spacing() ) );
3517  const int ldb( numeric_cast<int>( B.spacing() ) );
3518  const int ldc( numeric_cast<int>( C.spacing() ) );
3519 
3520  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3521  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3522  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3523  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3524  }
3525 #endif
3526  //**********************************************************************************************
3527 
3528  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3529 #if BLAZE_BLAS_MODE
3530 
3543  template< typename MT3 // Type of the left-hand side target matrix
3544  , typename MT4 // Type of the left-hand side matrix operand
3545  , typename MT5 // Type of the right-hand side matrix operand
3546  , typename ST2 > // Type of the scalar value
3547  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3548  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3549  {
3550  using boost::numeric_cast;
3551 
3552  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3553  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3554  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3555  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3556  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3557  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3558 
3559  const int M ( numeric_cast<int>( A.rows() ) );
3560  const int N ( numeric_cast<int>( B.columns() ) );
3561  const int K ( numeric_cast<int>( A.columns() ) );
3562  const int lda( numeric_cast<int>( A.spacing() ) );
3563  const int ldb( numeric_cast<int>( B.spacing() ) );
3564  const int ldc( numeric_cast<int>( C.spacing() ) );
3565  const complex<float> alpha( -scalar );
3566  const complex<float> beta ( 1.0F, 0.0F );
3567 
3568  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3569  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3570  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3571  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3572  }
3573 #endif
3574  //**********************************************************************************************
3575 
3576  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3577 #if BLAZE_BLAS_MODE
3578 
3591  template< typename MT3 // Type of the left-hand side target matrix
3592  , typename MT4 // Type of the left-hand side matrix operand
3593  , typename MT5 // Type of the right-hand side matrix operand
3594  , typename ST2 > // Type of the scalar value
3595  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3596  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3597  {
3598  using boost::numeric_cast;
3599 
3600  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT3::ElementType );
3601  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT4::ElementType );
3602  BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE( typename MT5::ElementType );
3603  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3604  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3605  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3606 
3607  const int M ( numeric_cast<int>( A.rows() ) );
3608  const int N ( numeric_cast<int>( B.columns() ) );
3609  const int K ( numeric_cast<int>( A.columns() ) );
3610  const int lda( numeric_cast<int>( A.spacing() ) );
3611  const int ldb( numeric_cast<int>( B.spacing() ) );
3612  const int ldc( numeric_cast<int>( C.spacing() ) );
3613  const complex<double> alpha( -scalar );
3614  const complex<double> beta ( 1.0, 0.0 );
3615 
3616  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3617  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3618  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3619  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3620  }
3621 #endif
3622  //**********************************************************************************************
3623 
3624  //**Subtraction assignment to sparse matrices***************************************************
3625  // No special implementation for the subtraction assignment to sparse matrices.
3626  //**********************************************************************************************
3627 
3628  //**Multiplication assignment to dense matrices*************************************************
3629  // No special implementation for the multiplication assignment to dense matrices.
3630  //**********************************************************************************************
3631 
3632  //**Multiplication assignment to sparse matrices************************************************
3633  // No special implementation for the multiplication assignment to sparse matrices.
3634  //**********************************************************************************************
3635 
3636  //**Compile time checks*************************************************************************
3645  //**********************************************************************************************
3646 };
3648 //*************************************************************************************************
3649 
3650 
3651 
3652 
3653 //=================================================================================================
3654 //
3655 // GLOBAL BINARY ARITHMETIC OPERATORS
3656 //
3657 //=================================================================================================
3658 
3659 //*************************************************************************************************
3685 template< typename T1 // Type of the left-hand side dense matrix
3686  , typename T2 > // Type of the right-hand side dense matrix
3687 inline const DMatDMatMultExpr<T1,T2>
3689 {
3691 
3692  if( (~lhs).columns() != (~rhs).rows() )
3693  throw std::invalid_argument( "Matrix sizes do not match" );
3694 
3695  return DMatDMatMultExpr<T1,T2>( ~lhs, ~rhs );
3696 }
3697 //*************************************************************************************************
3698 
3699 
3700 
3701 
3702 //=================================================================================================
3703 //
3704 // EXPRESSION TRAIT SPECIALIZATIONS
3705 //
3706 //=================================================================================================
3707 
3708 //*************************************************************************************************
3710 template< typename MT1, typename MT2, typename VT >
3711 struct DMatDVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
3712 {
3713  public:
3714  //**********************************************************************************************
3715  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3716  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
3717  IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
3718  , typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
3719  , INVALID_TYPE >::Type Type;
3720  //**********************************************************************************************
3721 };
3723 //*************************************************************************************************
3724 
3725 
3726 //*************************************************************************************************
3728 template< typename MT1, typename MT2, typename VT >
3729 struct DMatSVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
3730 {
3731  public:
3732  //**********************************************************************************************
3733  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3734  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
3735  IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
3736  , typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
3737  , INVALID_TYPE >::Type Type;
3738  //**********************************************************************************************
3739 };
3741 //*************************************************************************************************
3742 
3743 
3744 //*************************************************************************************************
3746 template< typename VT, typename MT1, typename MT2 >
3747 struct TDVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
3748 {
3749  public:
3750  //**********************************************************************************************
3751  typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
3752  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3753  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
3754  , typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3755  , INVALID_TYPE >::Type Type;
3756  //**********************************************************************************************
3757 };
3759 //*************************************************************************************************
3760 
3761 
3762 //*************************************************************************************************
3764 template< typename VT, typename MT1, typename MT2 >
3765 struct TSVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
3766 {
3767  public:
3768  //**********************************************************************************************
3769  typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
3770  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
3771  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
3772  , typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3773  , INVALID_TYPE >::Type Type;
3774  //**********************************************************************************************
3775 };
3777 //*************************************************************************************************
3778 
3779 
3780 //*************************************************************************************************
3782 template< typename MT1, typename MT2 >
3783 struct RowExprTrait< DMatDMatMultExpr<MT1,MT2> >
3784 {
3785  public:
3786  //**********************************************************************************************
3787  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
3788  //**********************************************************************************************
3789 };
3791 //*************************************************************************************************
3792 
3793 
3794 //*************************************************************************************************
3796 template< typename MT1, typename MT2 >
3797 struct ColumnExprTrait< DMatDMatMultExpr<MT1,MT2> >
3798 {
3799  public:
3800  //**********************************************************************************************
3801  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
3802  //**********************************************************************************************
3803 };
3805 //*************************************************************************************************
3806 
3807 } // namespace blaze
3808 
3809 #endif