All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
52 #include <blaze/math/Intrinsics.h>
53 #include <blaze/math/shims/Reset.h>
75 #include <blaze/system/BLAS.h>
77 #include <blaze/util/Assert.h>
78 #include <blaze/util/Complex.h>
84 #include <blaze/util/EnableIf.h>
85 #include <blaze/util/InvalidType.h>
87 #include <blaze/util/SelectType.h>
88 #include <blaze/util/Types.h>
94 
95 
96 namespace blaze {
97 
98 //=================================================================================================
99 //
100 // CLASS TDMATTDMATMULTEXPR
101 //
102 //=================================================================================================
103 
104 //*************************************************************************************************
111 template< typename MT1 // Type of the left-hand side dense matrix
112  , typename MT2 > // Type of the right-hand side dense matrix
113 class TDMatTDMatMultExpr : public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
114  , private MatMatMultExpr
115  , private Computation
116 {
117  private:
118  //**Type definitions****************************************************************************
119  typedef typename MT1::ResultType RT1;
120  typedef typename MT2::ResultType RT2;
121  typedef typename RT1::ElementType ET1;
122  typedef typename RT2::ElementType ET2;
123  typedef typename MT1::CompositeType CT1;
124  typedef typename MT2::CompositeType CT2;
125  //**********************************************************************************************
126 
127  //**********************************************************************************************
130  //**********************************************************************************************
131 
132  //**********************************************************************************************
134  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
135  //**********************************************************************************************
136 
137  //**********************************************************************************************
139 
142  template< typename T1, typename T2, typename T3 >
143  struct UseSMPAssignKernel {
144  enum { value = evaluateLeft || evaluateRight };
145  };
147  //**********************************************************************************************
148 
149  //**********************************************************************************************
151 
154  template< typename T1, typename T2, typename T3 >
155  struct UseSinglePrecisionKernel {
156  enum { value = IsFloat<typename T1::ElementType>::value &&
157  IsFloat<typename T2::ElementType>::value &&
158  IsFloat<typename T3::ElementType>::value };
159  };
161  //**********************************************************************************************
162 
163  //**********************************************************************************************
165 
168  template< typename T1, typename T2, typename T3 >
169  struct UseDoublePrecisionKernel {
170  enum { value = IsDouble<typename T1::ElementType>::value &&
171  IsDouble<typename T2::ElementType>::value &&
172  IsDouble<typename T3::ElementType>::value };
173  };
175  //**********************************************************************************************
176 
177  //**********************************************************************************************
179 
183  template< typename T1, typename T2, typename T3 >
184  struct UseSinglePrecisionComplexKernel {
185  typedef complex<float> Type;
186  enum { value = IsSame<typename T1::ElementType,Type>::value &&
187  IsSame<typename T2::ElementType,Type>::value &&
188  IsSame<typename T3::ElementType,Type>::value };
189  };
191  //**********************************************************************************************
192 
193  //**********************************************************************************************
195 
199  template< typename T1, typename T2, typename T3 >
200  struct UseDoublePrecisionComplexKernel {
201  typedef complex<double> Type;
202  enum { value = IsSame<typename T1::ElementType,Type>::value &&
203  IsSame<typename T2::ElementType,Type>::value &&
204  IsSame<typename T3::ElementType,Type>::value };
205  };
207  //**********************************************************************************************
208 
209  //**********************************************************************************************
211 
214  template< typename T1, typename T2, typename T3 >
215  struct UseDefaultKernel {
216  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
217  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
218  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
219  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
220  };
222  //**********************************************************************************************
223 
224  //**********************************************************************************************
226 
229  template< typename T1, typename T2, typename T3 >
230  struct UseVectorizedDefaultKernel {
231  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
232  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
233  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
234  IntrinsicTrait<typename T1::ElementType>::addition &&
235  IntrinsicTrait<typename T1::ElementType>::subtraction &&
236  IntrinsicTrait<typename T1::ElementType>::multiplication };
237  };
239  //**********************************************************************************************
240 
241  public:
242  //**Type definitions****************************************************************************
249  typedef const ElementType ReturnType;
250  typedef const ResultType CompositeType;
251 
253  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
254 
256  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
257 
260 
263  //**********************************************************************************************
264 
265  //**Compilation flags***************************************************************************
267  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
271 
273  enum { smpAssignable = !evaluateLeft && !evaluateRight };
274  //**********************************************************************************************
275 
276  //**Constructor*********************************************************************************
282  explicit inline TDMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
283  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
284  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
285  {
286  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
287  }
288  //**********************************************************************************************
289 
290  //**Access operator*****************************************************************************
297  inline ReturnType operator()( size_t i, size_t j ) const {
298  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
299  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
300 
301  ElementType tmp;
302 
303  if( lhs_.columns() != 0UL ) {
304  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
305  tmp = lhs_(i,0UL) * rhs_(0UL,j);
306  for( size_t k=1UL; k<end; k+=2UL ) {
307  tmp += lhs_(i,k ) * rhs_(k ,j);
308  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
309  }
310  if( end < lhs_.columns() ) {
311  tmp += lhs_(i,end) * rhs_(end,j);
312  }
313  }
314  else {
315  reset( tmp );
316  }
317 
318  return tmp;
319  }
320  //**********************************************************************************************
321 
322  //**Rows function*******************************************************************************
327  inline size_t rows() const {
328  return lhs_.rows();
329  }
330  //**********************************************************************************************
331 
332  //**Columns function****************************************************************************
337  inline size_t columns() const {
338  return rhs_.columns();
339  }
340  //**********************************************************************************************
341 
342  //**Left operand access*************************************************************************
347  inline LeftOperand leftOperand() const {
348  return lhs_;
349  }
350  //**********************************************************************************************
351 
352  //**Right operand access************************************************************************
357  inline RightOperand rightOperand() const {
358  return rhs_;
359  }
360  //**********************************************************************************************
361 
362  //**********************************************************************************************
368  template< typename T >
369  inline bool canAlias( const T* alias ) const {
370  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
371  }
372  //**********************************************************************************************
373 
374  //**********************************************************************************************
380  template< typename T >
381  inline bool isAliased( const T* alias ) const {
382  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
383  }
384  //**********************************************************************************************
385 
386  //**********************************************************************************************
391  inline bool isAligned() const {
392  return lhs_.isAligned() && rhs_.isAligned();
393  }
394  //**********************************************************************************************
395 
396  //**********************************************************************************************
401  inline bool canSMPAssign() const {
402  return ( !BLAZE_BLAS_IS_PARALLEL ||
403  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
405  }
406  //**********************************************************************************************
407 
408  private:
409  //**Member variables****************************************************************************
412  //**********************************************************************************************
413 
414  //**Assignment to dense matrices****************************************************************
424  template< typename MT // Type of the target dense matrix
425  , bool SO > // Storage order of the target dense matrix
426  friend inline void assign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
427  {
429 
430  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
431  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
432 
433  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
434  return;
435  }
436  else if( rhs.lhs_.columns() == 0UL ) {
437  reset( ~lhs );
438  return;
439  }
440 
441  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
442  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
443 
444  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
445  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
446  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
447  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
448  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
449  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
450 
451  TDMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
452  }
454  //**********************************************************************************************
455 
456  //**Assignment to dense matrices (kernel selection)*********************************************
467  template< typename MT3 // Type of the left-hand side target matrix
468  , typename MT4 // Type of the left-hand side matrix operand
469  , typename MT5 > // Type of the right-hand side matrix operand
470  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
471  selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
472  {
473  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
474  TDMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
475  else
476  TDMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
477  }
479  //**********************************************************************************************
480 
481  //**Assignment to dense matrices (kernel selection)*********************************************
492  template< typename MT3 // Type of the left-hand side target matrix
493  , typename MT4 // Type of the left-hand side matrix operand
494  , typename MT5 > // Type of the right-hand side matrix operand
495  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
496  selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
497  {
498  smpAssign( C, A * B );
499  }
501  //**********************************************************************************************
502 
503  //**Default assignment to dense matrices********************************************************
517  template< typename MT3 // Type of the left-hand side target matrix
518  , typename MT4 // Type of the left-hand side matrix operand
519  , typename MT5 > // Type of the right-hand side matrix operand
520  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
521  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
522  {
523  const size_t M( A.rows() );
524  const size_t N( B.columns() );
525  const size_t K( A.columns() );
526 
527  for( size_t i=0UL; i<M; ++i ) {
528  for( size_t j=0UL; j<N; ++j ) {
529  C(i,j) = A(i,0UL) * B(0UL,j);
530  }
531  for( size_t k=1UL; k<K; ++k ) {
532  for( size_t j=0UL; j<N; ++j ) {
533  C(i,j) += A(i,k) * B(k,j);
534  }
535  }
536  }
537  }
539  //**********************************************************************************************
540 
541  //**Vectorized default assignment to row-major dense matrices***********************************
555  template< typename MT3 // Type of the left-hand side target matrix
556  , typename MT4 // Type of the left-hand side matrix operand
557  , typename MT5 > // Type of the right-hand side matrix operand
558  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
559  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
560  {
563 
564  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
565  const typename MT5::OppositeType tmp( B );
566  smpAssign( ~C, A * tmp );
567  }
568  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
569  const typename MT4::OppositeType tmp( A );
570  smpAssign( ~C, tmp * B );
571  }
572  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
573  const typename MT5::OppositeType tmp( B );
574  smpAssign( ~C, A * tmp );
575  }
576  else {
577  const typename MT4::OppositeType tmp( A );
578  smpAssign( ~C, tmp * B );
579  }
580  }
582  //**********************************************************************************************
583 
584  //**Vectorized default assignment to column-major dense matrices********************************
598  template< typename MT3 // Type of the left-hand side target matrix
599  , typename MT4 // Type of the left-hand side matrix operand
600  , typename MT5 > // Type of the right-hand side matrix operand
601  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
602  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
603  {
604  typedef IntrinsicTrait<ElementType> IT;
605 
606  const size_t M( A.rows() );
607  const size_t N( B.columns() );
608  const size_t K( A.columns() );
609 
610  size_t i( 0UL );
611 
612  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
613  for( size_t j=0UL; j<N; ++j ) {
614  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
615  for( size_t k=0UL; k<K; ++k ) {
616  const IntrinsicType b1( set( B(k,j) ) );
617  xmm1 = xmm1 + A.load(i ,k) * b1;
618  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
619  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
620  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
621  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
622  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
623  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
624  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
625  }
626  (~C).store( i , j, xmm1 );
627  (~C).store( i+IT::size , j, xmm2 );
628  (~C).store( i+IT::size*2UL, j, xmm3 );
629  (~C).store( i+IT::size*3UL, j, xmm4 );
630  (~C).store( i+IT::size*4UL, j, xmm5 );
631  (~C).store( i+IT::size*5UL, j, xmm6 );
632  (~C).store( i+IT::size*6UL, j, xmm7 );
633  (~C).store( i+IT::size*7UL, j, xmm8 );
634  }
635  }
636  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
637  size_t j( 0UL );
638  for( ; (j+2UL) <= N; j+=2UL ) {
639  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
640  for( size_t k=0UL; k<K; ++k ) {
641  const IntrinsicType a1( A.load(i ,k) );
642  const IntrinsicType a2( A.load(i+IT::size ,k) );
643  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
644  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
645  const IntrinsicType b1( set( B(k,j ) ) );
646  const IntrinsicType b2( set( B(k,j+1UL) ) );
647  xmm1 = xmm1 + a1 * b1;
648  xmm2 = xmm2 + a2 * b1;
649  xmm3 = xmm3 + a3 * b1;
650  xmm4 = xmm4 + a4 * b1;
651  xmm5 = xmm5 + a1 * b2;
652  xmm6 = xmm6 + a2 * b2;
653  xmm7 = xmm7 + a3 * b2;
654  xmm8 = xmm8 + a4 * b2;
655  }
656  (~C).store( i , j , xmm1 );
657  (~C).store( i+IT::size , j , xmm2 );
658  (~C).store( i+IT::size*2UL, j , xmm3 );
659  (~C).store( i+IT::size*3UL, j , xmm4 );
660  (~C).store( i , j+1UL, xmm5 );
661  (~C).store( i+IT::size , j+1UL, xmm6 );
662  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
663  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
664  }
665  if( j < N ) {
666  IntrinsicType xmm1, xmm2, xmm3, xmm4;
667  for( size_t k=0UL; k<K; ++k ) {
668  const IntrinsicType b1( set( B(k,j) ) );
669  xmm1 = xmm1 + A.load(i ,k) * b1;
670  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
671  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
672  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
673  }
674  (~C).store( i , j, xmm1 );
675  (~C).store( i+IT::size , j, xmm2 );
676  (~C).store( i+IT::size*2UL, j, xmm3 );
677  (~C).store( i+IT::size*3UL, j, xmm4 );
678  }
679  }
680  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
681  size_t j( 0UL );
682  for( ; (j+2UL) <= N; j+=2UL ) {
683  IntrinsicType xmm1, xmm2, xmm3, xmm4;
684  for( size_t k=0UL; k<K; ++k ) {
685  const IntrinsicType a1( A.load(i ,k) );
686  const IntrinsicType a2( A.load(i+IT::size,k) );
687  const IntrinsicType b1( set( B(k,j ) ) );
688  const IntrinsicType b2( set( B(k,j+1UL) ) );
689  xmm1 = xmm1 + a1 * b1;
690  xmm2 = xmm2 + a2 * b1;
691  xmm3 = xmm3 + a1 * b2;
692  xmm4 = xmm4 + a2 * b2;
693  }
694  (~C).store( i , j , xmm1 );
695  (~C).store( i+IT::size, j , xmm2 );
696  (~C).store( i , j+1UL, xmm3 );
697  (~C).store( i+IT::size, j+1UL, xmm4 );
698  }
699  if( j < N ) {
700  IntrinsicType xmm1, xmm2;
701  for( size_t k=0UL; k<K; ++k ) {
702  const IntrinsicType b1( set( B(k,j) ) );
703  xmm1 = xmm1 + A.load(i ,k) * b1;
704  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
705  }
706  (~C).store( i , j, xmm1 );
707  (~C).store( i+IT::size, j, xmm2 );
708  }
709  }
710  if( i < M ) {
711  size_t j( 0UL );
712  for( ; (j+2UL) <= N; j+=2UL ) {
713  IntrinsicType xmm1, xmm2;
714  for( size_t k=0UL; k<K; ++k ) {
715  const IntrinsicType a1( A.load(i,k) );
716  xmm1 = xmm1 + a1 * set( B(k,j ) );
717  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
718  }
719  (~C).store( i, j , xmm1 );
720  (~C).store( i, j+1UL, xmm2 );
721  }
722  if( j < N ) {
723  IntrinsicType xmm1;
724  for( size_t k=0UL; k<K; ++k ) {
725  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
726  }
727  (~C).store( i, j, xmm1 );
728  }
729  }
730  }
732  //**********************************************************************************************
733 
734  //**BLAS-based assignment to dense matrices (default)*******************************************
748  template< typename MT3 // Type of the left-hand side target matrix
749  , typename MT4 // Type of the left-hand side matrix operand
750  , typename MT5 > // Type of the right-hand side matrix operand
751  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
752  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
753  {
754  selectDefaultAssignKernel( C, A, B );
755  }
757  //**********************************************************************************************
758 
759  //**BLAS-based assignment to dense matrices (single precision)**********************************
760 #if BLAZE_BLAS_MODE
761 
774  template< typename MT3 // Type of the left-hand side target matrix
775  , typename MT4 // Type of the left-hand side matrix operand
776  , typename MT5 > // Type of the right-hand side matrix operand
777  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
778  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
779  {
780  using boost::numeric_cast;
781 
785 
786  const int M ( numeric_cast<int>( A.rows() ) );
787  const int N ( numeric_cast<int>( B.columns() ) );
788  const int K ( numeric_cast<int>( A.columns() ) );
789  const int lda( numeric_cast<int>( A.spacing() ) );
790  const int ldb( numeric_cast<int>( B.spacing() ) );
791  const int ldc( numeric_cast<int>( C.spacing() ) );
792 
793  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
794  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
795  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
796  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
797  }
799 #endif
800  //**********************************************************************************************
801 
802  //**BLAS-based assignment to dense matrices (double precision)**********************************
803 #if BLAZE_BLAS_MODE
804 
817  template< typename MT3 // Type of the left-hand side target matrix
818  , typename MT4 // Type of the left-hand side matrix operand
819  , typename MT5 > // Type of the right-hand side matrix operand
820  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
821  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
822  {
823  using boost::numeric_cast;
824 
828 
829  const int M ( numeric_cast<int>( A.rows() ) );
830  const int N ( numeric_cast<int>( B.columns() ) );
831  const int K ( numeric_cast<int>( A.columns() ) );
832  const int lda( numeric_cast<int>( A.spacing() ) );
833  const int ldb( numeric_cast<int>( B.spacing() ) );
834  const int ldc( numeric_cast<int>( C.spacing() ) );
835 
836  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
837  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
838  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
839  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
840  }
842 #endif
843  //**********************************************************************************************
844 
845  //**BLAS-based assignment to dense matrices (single precision complex)**************************
846 #if BLAZE_BLAS_MODE
847 
860  template< typename MT3 // Type of the left-hand side target matrix
861  , typename MT4 // Type of the left-hand side matrix operand
862  , typename MT5 > // Type of the right-hand side matrix operand
863  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
864  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
865  {
866  using boost::numeric_cast;
867 
871  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
872  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
873  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
874 
875  const int M ( numeric_cast<int>( A.rows() ) );
876  const int N ( numeric_cast<int>( B.columns() ) );
877  const int K ( numeric_cast<int>( A.columns() ) );
878  const int lda( numeric_cast<int>( A.spacing() ) );
879  const int ldb( numeric_cast<int>( B.spacing() ) );
880  const int ldc( numeric_cast<int>( C.spacing() ) );
881  complex<float> alpha( 1.0F, 0.0F );
882  complex<float> beta ( 0.0F, 0.0F );
883 
884  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
885  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
886  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
887  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
888  }
890 #endif
891  //**********************************************************************************************
892 
893  //**BLAS-based assignment to dense matrices (double precision complex)**************************
894 #if BLAZE_BLAS_MODE
895 
908  template< typename MT3 // Type of the left-hand side target matrix
909  , typename MT4 // Type of the left-hand side matrix operand
910  , typename MT5 > // Type of the right-hand side matrix operand
911  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
912  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
913  {
914  using boost::numeric_cast;
915 
919  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
920  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
921  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
922 
923  const int M ( numeric_cast<int>( A.rows() ) );
924  const int N ( numeric_cast<int>( B.columns() ) );
925  const int K ( numeric_cast<int>( A.columns() ) );
926  const int lda( numeric_cast<int>( A.spacing() ) );
927  const int ldb( numeric_cast<int>( B.spacing() ) );
928  const int ldc( numeric_cast<int>( C.spacing() ) );
929  complex<double> alpha( 1.0, 0.0 );
930  complex<double> beta ( 0.0, 0.0 );
931 
932  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
933  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
934  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
935  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
936  }
938 #endif
939  //**********************************************************************************************
940 
941  //**Assignment to sparse matrices***************************************************************
954  template< typename MT // Type of the target sparse matrix
955  , bool SO > // Storage order of the target sparse matrix
956  friend inline void assign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
957  {
959 
960  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
961 
968 
969  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
970  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
971 
972  const TmpType tmp( rhs );
973  smpAssign( ~lhs, tmp );
974  }
976  //**********************************************************************************************
977 
978  //**Addition assignment to dense matrices*******************************************************
991  template< typename MT // Type of the target dense matrix
992  , bool SO > // Storage order of the target dense matrix
993  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
994  {
996 
997  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
998  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
999 
1000  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1001  return;
1002  }
1003 
1004  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1005  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1006 
1007  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1008  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1009  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1010  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1011  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1012  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1013 
1014  TDMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1015  }
1017  //**********************************************************************************************
1018 
1019  //**Addition assignment to dense matrices (kernel selection)************************************
1030  template< typename MT3 // Type of the left-hand side target matrix
1031  , typename MT4 // Type of the left-hand side matrix operand
1032  , typename MT5 > // Type of the right-hand side matrix operand
1033  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1034  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1035  {
1036  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
1037  TDMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1038  else
1039  TDMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1040  }
1042  //**********************************************************************************************
1043 
1044  //**Addition assignment to dense matrices (kernel selection)************************************
1055  template< typename MT3 // Type of the left-hand side target matrix
1056  , typename MT4 // Type of the left-hand side matrix operand
1057  , typename MT5 > // Type of the right-hand side matrix operand
1058  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1059  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1060  {
1061  smpAddAssign( C, A * B );
1062  }
1064  //**********************************************************************************************
1065 
1066  //**Default addition assignment to dense matrices***********************************************
1080  template< typename MT3 // Type of the left-hand side target matrix
1081  , typename MT4 // Type of the left-hand side matrix operand
1082  , typename MT5 > // Type of the right-hand side matrix operand
1083  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1084  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1085  {
1086  const size_t M( A.rows() );
1087  const size_t N( B.columns() );
1088  const size_t K( A.columns() );
1089 
1090  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1091  const size_t end( N & size_t(-2) );
1092 
1093  for( size_t i=0UL; i<M; ++i ) {
1094  for( size_t k=0UL; k<K; ++k ) {
1095  for( size_t j=0UL; j<end; j+=2UL ) {
1096  C(i,j ) += A(i,k) * B(k,j );
1097  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1098  }
1099  if( end < N ) {
1100  C(i,end) += A(i,k) * B(k,end);
1101  }
1102  }
1103  }
1104  }
1106  //**********************************************************************************************
1107 
1108  //**Vectorized default addition assignment to row-major dense matrices**************************
1122  template< typename MT3 // Type of the left-hand side target matrix
1123  , typename MT4 // Type of the left-hand side matrix operand
1124  , typename MT5 > // Type of the right-hand side matrix operand
1125  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1126  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1127  {
1130 
1131  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1132  const typename MT5::OppositeType tmp( B );
1133  addAssign( ~C, A * tmp );
1134  }
1135  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1136  const typename MT4::OppositeType tmp( A );
1137  addAssign( ~C, tmp * B );
1138  }
1139  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1140  const typename MT5::OppositeType tmp( B );
1141  addAssign( ~C, A * tmp );
1142  }
1143  else {
1144  const typename MT4::OppositeType tmp( A );
1145  addAssign( ~C, tmp * B );
1146  }
1147  }
1149  //**********************************************************************************************
1150 
1151  //**Vectorized default addition assignment to column-major dense matrices***********************
1165  template< typename MT3 // Type of the left-hand side target matrix
1166  , typename MT4 // Type of the left-hand side matrix operand
1167  , typename MT5 > // Type of the right-hand side matrix operand
1168  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1169  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1170  {
1171  typedef IntrinsicTrait<ElementType> IT;
1172 
1173  const size_t M( A.rows() );
1174  const size_t N( B.columns() );
1175  const size_t K( A.columns() );
1176 
1177  size_t i( 0UL );
1178 
1179  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1180  for( size_t j=0UL; j<N; ++j ) {
1181  IntrinsicType xmm1( (~C).load(i ,j) );
1182  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1183  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1184  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1185  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
1186  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
1187  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
1188  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
1189  for( size_t k=0UL; k<K; ++k ) {
1190  const IntrinsicType b1( set( B(k,j) ) );
1191  xmm1 = xmm1 + A.load(i ,k) * b1;
1192  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1193  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1194  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1195  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
1196  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
1197  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
1198  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
1199  }
1200  (~C).store( i , j, xmm1 );
1201  (~C).store( i+IT::size , j, xmm2 );
1202  (~C).store( i+IT::size*2UL, j, xmm3 );
1203  (~C).store( i+IT::size*3UL, j, xmm4 );
1204  (~C).store( i+IT::size*4UL, j, xmm5 );
1205  (~C).store( i+IT::size*5UL, j, xmm6 );
1206  (~C).store( i+IT::size*6UL, j, xmm7 );
1207  (~C).store( i+IT::size*7UL, j, xmm8 );
1208  }
1209  }
1210  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1211  size_t j( 0UL );
1212  for( ; (j+2UL) <= N; j+=2UL ) {
1213  IntrinsicType xmm1( (~C).load(i ,j ) );
1214  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
1215  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
1216  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
1217  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1218  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
1219  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
1220  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
1221  for( size_t k=0UL; k<K; ++k ) {
1222  const IntrinsicType a1( A.load(i ,k) );
1223  const IntrinsicType a2( A.load(i+IT::size ,k) );
1224  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
1225  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
1226  const IntrinsicType b1( set( B(k,j ) ) );
1227  const IntrinsicType b2( set( B(k,j+1UL) ) );
1228  xmm1 = xmm1 + a1 * b1;
1229  xmm2 = xmm2 + a2 * b1;
1230  xmm3 = xmm3 + a3 * b1;
1231  xmm4 = xmm4 + a4 * b1;
1232  xmm5 = xmm5 + a1 * b2;
1233  xmm6 = xmm6 + a2 * b2;
1234  xmm7 = xmm7 + a3 * b2;
1235  xmm8 = xmm8 + a4 * b2;
1236  }
1237  (~C).store( i , j , xmm1 );
1238  (~C).store( i+IT::size , j , xmm2 );
1239  (~C).store( i+IT::size*2UL, j , xmm3 );
1240  (~C).store( i+IT::size*3UL, j , xmm4 );
1241  (~C).store( i , j+1UL, xmm5 );
1242  (~C).store( i+IT::size , j+1UL, xmm6 );
1243  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
1244  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
1245  }
1246  if( j < N ) {
1247  IntrinsicType xmm1( (~C).load(i ,j) );
1248  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1249  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1250  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1251  for( size_t k=0UL; k<K; ++k ) {
1252  const IntrinsicType b1( set( B(k,j) ) );
1253  xmm1 = xmm1 + A.load(i ,k) * b1;
1254  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1255  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1256  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1257  }
1258  (~C).store( i , j, xmm1 );
1259  (~C).store( i+IT::size , j, xmm2 );
1260  (~C).store( i+IT::size*2UL, j, xmm3 );
1261  (~C).store( i+IT::size*3UL, j, xmm4 );
1262  }
1263  }
1264  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1265  size_t j( 0UL );
1266  for( ; (j+2UL) <= N; j+=2UL ) {
1267  IntrinsicType xmm1( (~C).load(i ,j ) );
1268  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
1269  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1270  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
1271  for( size_t k=0UL; k<K; ++k ) {
1272  const IntrinsicType a1( A.load(i ,k) );
1273  const IntrinsicType a2( A.load(i+IT::size,k) );
1274  const IntrinsicType b1( set( B(k,j ) ) );
1275  const IntrinsicType b2( set( B(k,j+1UL) ) );
1276  xmm1 = xmm1 + a1 * b1;
1277  xmm2 = xmm2 + a2 * b1;
1278  xmm3 = xmm3 + a1 * b2;
1279  xmm4 = xmm4 + a2 * b2;
1280  }
1281  (~C).store( i , j , xmm1 );
1282  (~C).store( i+IT::size, j , xmm2 );
1283  (~C).store( i , j+1UL, xmm3 );
1284  (~C).store( i+IT::size, j+1UL, xmm4 );
1285  }
1286  if( j < N ) {
1287  IntrinsicType xmm1( (~C).load(i ,j) );
1288  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
1289  for( size_t k=0UL; k<K; ++k ) {
1290  const IntrinsicType b1( set( B(k,j) ) );
1291  xmm1 = xmm1 + A.load(i ,k) * b1;
1292  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1293  }
1294  (~C).store( i , j, xmm1 );
1295  (~C).store( i+IT::size, j, xmm2 );
1296  }
1297  }
1298  if( i < M ) {
1299  size_t j( 0UL );
1300  for( ; (j+2UL) <= N; j+=2UL ) {
1301  IntrinsicType xmm1( (~C).load(i,j ) );
1302  IntrinsicType xmm2( (~C).load(i,j+1UL) );
1303  for( size_t k=0UL; k<K; ++k ) {
1304  const IntrinsicType a1( A.load(i,k) );
1305  xmm1 = xmm1 + a1 * set( B(k,j ) );
1306  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1307  }
1308  (~C).store( i, j , xmm1 );
1309  (~C).store( i, j+1UL, xmm2 );
1310  }
1311  if( j < N ) {
1312  IntrinsicType xmm1( (~C).load(i,j) );
1313  for( size_t k=0UL; k<K; ++k ) {
1314  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
1315  }
1316  (~C).store( i, j, xmm1 );
1317  }
1318  }
1319  }
1321  //**********************************************************************************************
1322 
1323  //**BLAS-based addition assignment to dense matrices (default)**********************************
1337  template< typename MT3 // Type of the left-hand side target matrix
1338  , typename MT4 // Type of the left-hand side matrix operand
1339  , typename MT5 > // Type of the right-hand side matrix operand
1340  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1341  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1342  {
1343  selectDefaultAddAssignKernel( C, A, B );
1344  }
1346  //**********************************************************************************************
1347 
1348  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1349 #if BLAZE_BLAS_MODE
1350 
1363  template< typename MT3 // Type of the left-hand side target matrix
1364  , typename MT4 // Type of the left-hand side matrix operand
1365  , typename MT5 > // Type of the right-hand side matrix operand
1366  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1367  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1368  {
1369  using boost::numeric_cast;
1370 
1374 
1375  const int M ( numeric_cast<int>( A.rows() ) );
1376  const int N ( numeric_cast<int>( B.columns() ) );
1377  const int K ( numeric_cast<int>( A.columns() ) );
1378  const int lda( numeric_cast<int>( A.spacing() ) );
1379  const int ldb( numeric_cast<int>( B.spacing() ) );
1380  const int ldc( numeric_cast<int>( C.spacing() ) );
1381 
1382  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1383  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1384  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1385  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1386  }
1388 #endif
1389  //**********************************************************************************************
1390 
1391  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1392 #if BLAZE_BLAS_MODE
1393 
1406  template< typename MT3 // Type of the left-hand side target matrix
1407  , typename MT4 // Type of the left-hand side matrix operand
1408  , typename MT5 > // Type of the right-hand side matrix operand
1409  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1410  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1411  {
1412  using boost::numeric_cast;
1413 
1417 
1418  const int M ( numeric_cast<int>( A.rows() ) );
1419  const int N ( numeric_cast<int>( B.columns() ) );
1420  const int K ( numeric_cast<int>( A.columns() ) );
1421  const int lda( numeric_cast<int>( A.spacing() ) );
1422  const int ldb( numeric_cast<int>( B.spacing() ) );
1423  const int ldc( numeric_cast<int>( C.spacing() ) );
1424 
1425  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1426  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1427  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1428  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1429  }
1431 #endif
1432  //**********************************************************************************************
1433 
1434  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1435 #if BLAZE_BLAS_MODE
1436 
1449  template< typename MT3 // Type of the left-hand side target matrix
1450  , typename MT4 // Type of the left-hand side matrix operand
1451  , typename MT5 > // Type of the right-hand side matrix operand
1452  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1453  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1454  {
1455  using boost::numeric_cast;
1456 
1460  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1461  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1462  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1463 
1464  const int M ( numeric_cast<int>( A.rows() ) );
1465  const int N ( numeric_cast<int>( B.columns() ) );
1466  const int K ( numeric_cast<int>( A.columns() ) );
1467  const int lda( numeric_cast<int>( A.spacing() ) );
1468  const int ldb( numeric_cast<int>( B.spacing() ) );
1469  const int ldc( numeric_cast<int>( C.spacing() ) );
1470  const complex<float> alpha( 1.0F, 0.0F );
1471  const complex<float> beta ( 1.0F, 0.0F );
1472 
1473  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1474  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1475  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1476  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1477  }
1479 #endif
1480  //**********************************************************************************************
1481 
1482  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1483 #if BLAZE_BLAS_MODE
1484 
1497  template< typename MT3 // Type of the left-hand side target matrix
1498  , typename MT4 // Type of the left-hand side matrix operand
1499  , typename MT5 > // Type of the right-hand side matrix operand
1500  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1501  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1502  {
1503  using boost::numeric_cast;
1504 
1508  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1509  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1510  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1511 
1512  const int M ( numeric_cast<int>( A.rows() ) );
1513  const int N ( numeric_cast<int>( B.columns() ) );
1514  const int K ( numeric_cast<int>( A.columns() ) );
1515  const int lda( numeric_cast<int>( A.spacing() ) );
1516  const int ldb( numeric_cast<int>( B.spacing() ) );
1517  const int ldc( numeric_cast<int>( C.spacing() ) );
1518  const complex<double> alpha( 1.0, 0.0 );
1519  const complex<double> beta ( 1.0, 0.0 );
1520 
1521  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1522  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1523  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1524  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1525  }
1527 #endif
1528  //**********************************************************************************************
1529 
1530  //**Addition assignment to sparse matrices******************************************************
1531  // No special implementation for the addition assignment to sparse matrices.
1532  //**********************************************************************************************
1533 
1534  //**Subtraction assignment to dense matrices****************************************************
1547  template< typename MT // Type of the target dense matrix
1548  , bool SO > // Storage order of the target dense matrix
1549  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1550  {
1552 
1553  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1554  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1555 
1556  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1557  return;
1558  }
1559 
1560  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1561  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1562 
1563  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1564  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1565  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1566  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1567  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1568  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1569 
1570  TDMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1571  }
1573  //**********************************************************************************************
1574 
1575  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1586  template< typename MT3 // Type of the left-hand side target matrix
1587  , typename MT4 // Type of the left-hand side matrix operand
1588  , typename MT5 > // Type of the right-hand side matrix operand
1589  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1590  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1591  {
1592  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
1593  TDMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1594  else
1595  TDMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1596  }
1598  //**********************************************************************************************
1599 
1600  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1611  template< typename MT3 // Type of the left-hand side target matrix
1612  , typename MT4 // Type of the left-hand side matrix operand
1613  , typename MT5 > // Type of the right-hand side matrix operand
1614  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1615  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1616  {
1617  smpSubAssign( C, A * B );
1618  }
1620  //**********************************************************************************************
1621 
1622  //**Default subtraction assignment to dense matrices********************************************
1636  template< typename MT3 // Type of the left-hand side target matrix
1637  , typename MT4 // Type of the left-hand side matrix operand
1638  , typename MT5 > // Type of the right-hand side matrix operand
1639  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1640  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1641  {
1642  const size_t M( A.rows() );
1643  const size_t N( B.columns() );
1644  const size_t K( A.columns() );
1645 
1646  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1647  const size_t end( N & size_t(-2) );
1648 
1649  for( size_t i=0UL; i<M; ++i ) {
1650  for( size_t k=0UL; k<K; ++k ) {
1651  for( size_t j=0UL; j<end; j+=2UL ) {
1652  C(i,j ) -= A(i,k) * B(k,j );
1653  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1654  }
1655  if( end < N ) {
1656  C(i,end) -= A(i,k) * B(k,end);
1657  }
1658  }
1659  }
1660  }
1662  //**********************************************************************************************
1663 
1664  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1678  template< typename MT3 // Type of the left-hand side target matrix
1679  , typename MT4 // Type of the left-hand side matrix operand
1680  , typename MT5 > // Type of the right-hand side matrix operand
1681  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1682  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1683  {
1686 
1687  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1688  const typename MT5::OppositeType tmp( B );
1689  subAssign( ~C, A * tmp );
1690  }
1691  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1692  const typename MT4::OppositeType tmp( A );
1693  subAssign( ~C, tmp * B );
1694  }
1695  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1696  const typename MT5::OppositeType tmp( B );
1697  subAssign( ~C, A * tmp );
1698  }
1699  else {
1700  const typename MT4::OppositeType tmp( A );
1701  subAssign( ~C, tmp * B );
1702  }
1703  }
1705  //**********************************************************************************************
1706 
1707  //**Vectorized default subtraction assignment to column-major dense matrices********************
1721  template< typename MT3 // Type of the left-hand side target matrix
1722  , typename MT4 // Type of the left-hand side matrix operand
1723  , typename MT5 > // Type of the right-hand side matrix operand
1724  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1725  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1726  {
1727  typedef IntrinsicTrait<ElementType> IT;
1728 
1729  const size_t M( A.rows() );
1730  const size_t N( B.columns() );
1731  const size_t K( A.columns() );
1732 
1733  size_t i( 0UL );
1734 
1735  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1736  for( size_t j=0UL; j<N; ++j ) {
1737  IntrinsicType xmm1( (~C).load(i ,j) );
1738  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1739  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1740  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1741  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
1742  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
1743  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
1744  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
1745  for( size_t k=0UL; k<K; ++k ) {
1746  const IntrinsicType b1( set( B(k,j) ) );
1747  xmm1 = xmm1 - A.load(i ,k) * b1;
1748  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1749  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1750  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1751  xmm5 = xmm5 - A.load(i+IT::size*4UL,k) * b1;
1752  xmm6 = xmm6 - A.load(i+IT::size*5UL,k) * b1;
1753  xmm7 = xmm7 - A.load(i+IT::size*6UL,k) * b1;
1754  xmm8 = xmm8 - A.load(i+IT::size*7UL,k) * b1;
1755  }
1756  (~C).store( i , j, xmm1 );
1757  (~C).store( i+IT::size , j, xmm2 );
1758  (~C).store( i+IT::size*2UL, j, xmm3 );
1759  (~C).store( i+IT::size*3UL, j, xmm4 );
1760  (~C).store( i+IT::size*4UL, j, xmm5 );
1761  (~C).store( i+IT::size*5UL, j, xmm6 );
1762  (~C).store( i+IT::size*6UL, j, xmm7 );
1763  (~C).store( i+IT::size*7UL, j, xmm8 );
1764  }
1765  }
1766  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1767  size_t j( 0UL );
1768  for( ; (j+2UL) <= N; j+=2UL ) {
1769  IntrinsicType xmm1( (~C).load(i ,j ) );
1770  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
1771  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
1772  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
1773  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1774  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
1775  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
1776  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
1777  for( size_t k=0UL; k<K; ++k ) {
1778  const IntrinsicType a1( A.load(i ,k) );
1779  const IntrinsicType a2( A.load(i+IT::size ,k) );
1780  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
1781  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
1782  const IntrinsicType b1( set( B(k,j ) ) );
1783  const IntrinsicType b2( set( B(k,j+1UL) ) );
1784  xmm1 = xmm1 - a1 * b1;
1785  xmm2 = xmm2 - a2 * b1;
1786  xmm3 = xmm3 - a3 * b1;
1787  xmm4 = xmm4 - a4 * b1;
1788  xmm5 = xmm5 - a1 * b2;
1789  xmm6 = xmm6 - a2 * b2;
1790  xmm7 = xmm7 - a3 * b2;
1791  xmm8 = xmm8 - a4 * b2;
1792  }
1793  (~C).store( i , j , xmm1 );
1794  (~C).store( i+IT::size , j , xmm2 );
1795  (~C).store( i+IT::size*2UL, j , xmm3 );
1796  (~C).store( i+IT::size*3UL, j , xmm4 );
1797  (~C).store( i , j+1UL, xmm5 );
1798  (~C).store( i+IT::size , j+1UL, xmm6 );
1799  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
1800  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
1801  }
1802  if( j < N ) {
1803  IntrinsicType xmm1( (~C).load(i ,j) );
1804  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1805  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1806  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1807  for( size_t k=0UL; k<K; ++k ) {
1808  const IntrinsicType b1( set( B(k,j) ) );
1809  xmm1 = xmm1 - A.load(i ,k) * b1;
1810  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1811  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1812  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1813  }
1814  (~C).store( i , j, xmm1 );
1815  (~C).store( i+IT::size , j, xmm2 );
1816  (~C).store( i+IT::size*2UL, j, xmm3 );
1817  (~C).store( i+IT::size*3UL, j, xmm4 );
1818  }
1819  }
1820  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1821  size_t j( 0UL );
1822  for( ; (j+2UL) <= N; j+=2UL ) {
1823  IntrinsicType xmm1( (~C).load(i ,j ) );
1824  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
1825  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1826  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
1827  for( size_t k=0UL; k<K; ++k ) {
1828  const IntrinsicType a1( A.load(i ,k) );
1829  const IntrinsicType a2( A.load(i+IT::size,k) );
1830  const IntrinsicType b1( set( B(k,j ) ) );
1831  const IntrinsicType b2( set( B(k,j+1UL) ) );
1832  xmm1 = xmm1 - a1 * b1;
1833  xmm2 = xmm2 - a2 * b1;
1834  xmm3 = xmm3 - a1 * b2;
1835  xmm4 = xmm4 - a2 * b2;
1836  }
1837  (~C).store( i , j , xmm1 );
1838  (~C).store( i+IT::size, j , xmm2 );
1839  (~C).store( i , j+1UL, xmm3 );
1840  (~C).store( i+IT::size, j+1UL, xmm4 );
1841  }
1842  if( j < N ) {
1843  IntrinsicType xmm1( (~C).load(i ,j) );
1844  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
1845  for( size_t k=0UL; k<K; ++k ) {
1846  const IntrinsicType b1( set( B(k,j) ) );
1847  xmm1 = xmm1 - A.load(i ,k) * b1;
1848  xmm2 = xmm2 - A.load(i+IT::size,k) * b1;
1849  }
1850  (~C).store( i , j, xmm1 );
1851  (~C).store( i+IT::size, j, xmm2 );
1852  }
1853  }
1854  if( i < M ) {
1855  size_t j( 0UL );
1856  for( ; (j+2UL) <= N; j+=2UL ) {
1857  IntrinsicType xmm1( (~C).load(i,j ) );
1858  IntrinsicType xmm2( (~C).load(i,j+1UL) );
1859  for( size_t k=0UL; k<K; ++k ) {
1860  const IntrinsicType a1( A.load(i,k) );
1861  xmm1 = xmm1 - a1 * set( B(k,j ) );
1862  xmm2 = xmm2 - a1 * set( B(k,j+1UL) );
1863  }
1864  (~C).store( i, j , xmm1 );
1865  (~C).store( i, j+1UL, xmm2 );
1866  }
1867  if( j < N ) {
1868  IntrinsicType xmm1( (~C).load(i,j) );
1869  for( size_t k=0UL; k<K; ++k ) {
1870  xmm1 = xmm1 - A.load(i,k) * set( B(k,j) );
1871  }
1872  (~C).store( i, j, xmm1 );
1873  }
1874  }
1875  }
1877  //**********************************************************************************************
1878 
1879  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1893  template< typename MT3 // Type of the left-hand side target matrix
1894  , typename MT4 // Type of the left-hand side matrix operand
1895  , typename MT5 > // Type of the right-hand side matrix operand
1896  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1897  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1898  {
1899  selectDefaultSubAssignKernel( C, A, B );
1900  }
1902  //**********************************************************************************************
1903 
1904  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1905 #if BLAZE_BLAS_MODE
1906 
1919  template< typename MT3 // Type of the left-hand side target matrix
1920  , typename MT4 // Type of the left-hand side matrix operand
1921  , typename MT5 > // Type of the right-hand side matrix operand
1922  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1923  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1924  {
1925  using boost::numeric_cast;
1926 
1930 
1931  const int M ( numeric_cast<int>( A.rows() ) );
1932  const int N ( numeric_cast<int>( B.columns() ) );
1933  const int K ( numeric_cast<int>( A.columns() ) );
1934  const int lda( numeric_cast<int>( A.spacing() ) );
1935  const int ldb( numeric_cast<int>( B.spacing() ) );
1936  const int ldc( numeric_cast<int>( C.spacing() ) );
1937 
1938  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1939  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1940  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1941  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1942  }
1944 #endif
1945  //**********************************************************************************************
1946 
1947  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1948 #if BLAZE_BLAS_MODE
1949 
1962  template< typename MT3 // Type of the left-hand side target matrix
1963  , typename MT4 // Type of the left-hand side matrix operand
1964  , typename MT5 > // Type of the right-hand side matrix operand
1965  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1966  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1967  {
1968  using boost::numeric_cast;
1969 
1973 
1974  const int M ( numeric_cast<int>( A.rows() ) );
1975  const int N ( numeric_cast<int>( B.columns() ) );
1976  const int K ( numeric_cast<int>( A.columns() ) );
1977  const int lda( numeric_cast<int>( A.spacing() ) );
1978  const int ldb( numeric_cast<int>( B.spacing() ) );
1979  const int ldc( numeric_cast<int>( C.spacing() ) );
1980 
1981  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1982  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1983  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1984  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1985  }
1987 #endif
1988  //**********************************************************************************************
1989 
1990  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1991 #if BLAZE_BLAS_MODE
1992 
2005  template< typename MT3 // Type of the left-hand side target matrix
2006  , typename MT4 // Type of the left-hand side matrix operand
2007  , typename MT5 > // Type of the right-hand side matrix operand
2008  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2009  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2010  {
2011  using boost::numeric_cast;
2012 
2016  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2017  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2018  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2019 
2020  const int M ( numeric_cast<int>( A.rows() ) );
2021  const int N ( numeric_cast<int>( B.columns() ) );
2022  const int K ( numeric_cast<int>( A.columns() ) );
2023  const int lda( numeric_cast<int>( A.spacing() ) );
2024  const int ldb( numeric_cast<int>( B.spacing() ) );
2025  const int ldc( numeric_cast<int>( C.spacing() ) );
2026  const complex<float> alpha( -1.0F, 0.0F );
2027  const complex<float> beta ( 1.0F, 0.0F );
2028 
2029  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2030  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2031  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2032  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2033  }
2035 #endif
2036  //**********************************************************************************************
2037 
2038  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
2039 #if BLAZE_BLAS_MODE
2040 
2053  template< typename MT3 // Type of the left-hand side target matrix
2054  , typename MT4 // Type of the left-hand side matrix operand
2055  , typename MT5 > // Type of the right-hand side matrix operand
2056  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2057  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2058  {
2059  using boost::numeric_cast;
2060 
2064  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2065  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2066  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2067 
2068  const int M ( numeric_cast<int>( A.rows() ) );
2069  const int N ( numeric_cast<int>( B.columns() ) );
2070  const int K ( numeric_cast<int>( A.columns() ) );
2071  const int lda( numeric_cast<int>( A.spacing() ) );
2072  const int ldb( numeric_cast<int>( B.spacing() ) );
2073  const int ldc( numeric_cast<int>( C.spacing() ) );
2074  const complex<double> alpha( -1.0, 0.0 );
2075  const complex<double> beta ( 1.0, 0.0 );
2076 
2077  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2078  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2079  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2080  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2081  }
2083 #endif
2084  //**********************************************************************************************
2085 
2086  //**Subtraction assignment to sparse matrices***************************************************
2087  // No special implementation for the subtraction assignment to sparse matrices.
2088  //**********************************************************************************************
2089 
2090  //**Multiplication assignment to dense matrices*************************************************
2091  // No special implementation for the multiplication assignment to dense matrices.
2092  //**********************************************************************************************
2093 
2094  //**Multiplication assignment to sparse matrices************************************************
2095  // No special implementation for the multiplication assignment to sparse matrices.
2096  //**********************************************************************************************
2097 
2098  //**Compile time checks*************************************************************************
2105  //**********************************************************************************************
2106 };
2107 //*************************************************************************************************
2108 
2109 
2110 
2111 
2112 //=================================================================================================
2113 //
2114 // DMATSCALARMULTEXPR SPECIALIZATION
2115 //
2116 //=================================================================================================
2117 
2118 //*************************************************************************************************
2126 template< typename MT1 // Type of the left-hand side dense matrix
2127  , typename MT2 // Type of the right-hand side dense matrix
2128  , typename ST > // Type of the right-hand side scalar value
2129 class DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >
2130  : public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
2131  , private MatScalarMultExpr
2132  , private Computation
2133 {
2134  private:
2135  //**Type definitions****************************************************************************
2136  typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
2137  typedef typename MMM::ResultType RES;
2138  typedef typename MT1::ResultType RT1;
2139  typedef typename MT2::ResultType RT2;
2140  typedef typename RT1::ElementType ET1;
2141  typedef typename RT2::ElementType ET2;
2142  typedef typename MT1::CompositeType CT1;
2143  typedef typename MT2::CompositeType CT2;
2144  //**********************************************************************************************
2145 
2146  //**********************************************************************************************
2148  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2149  //**********************************************************************************************
2150 
2151  //**********************************************************************************************
2153  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2154  //**********************************************************************************************
2155 
2156  //**********************************************************************************************
2158 
2160  template< typename T1, typename T2, typename T3, typename T4 >
2161  struct UseSMPAssignKernel {
2162  enum { value = evaluateLeft || evaluateRight };
2163  };
2164  //**********************************************************************************************
2165 
2166  //**********************************************************************************************
2168 
2171  template< typename T1, typename T2, typename T3, typename T4 >
2172  struct UseSinglePrecisionKernel {
2173  enum { value = IsFloat<typename T1::ElementType>::value &&
2174  IsFloat<typename T2::ElementType>::value &&
2175  IsFloat<typename T3::ElementType>::value &&
2176  !IsComplex<T4>::value };
2177  };
2178  //**********************************************************************************************
2179 
2180  //**********************************************************************************************
2182 
2185  template< typename T1, typename T2, typename T3, typename T4 >
2186  struct UseDoublePrecisionKernel {
2187  enum { value = IsDouble<typename T1::ElementType>::value &&
2188  IsDouble<typename T2::ElementType>::value &&
2189  IsDouble<typename T3::ElementType>::value &&
2190  !IsComplex<T4>::value };
2191  };
2192  //**********************************************************************************************
2193 
2194  //**********************************************************************************************
2196 
2199  template< typename T1, typename T2, typename T3 >
2200  struct UseSinglePrecisionComplexKernel {
2201  typedef complex<float> Type;
2202  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2203  IsSame<typename T2::ElementType,Type>::value &&
2204  IsSame<typename T3::ElementType,Type>::value };
2205  };
2206  //**********************************************************************************************
2207 
2208  //**********************************************************************************************
2210 
2213  template< typename T1, typename T2, typename T3 >
2214  struct UseDoublePrecisionComplexKernel {
2215  typedef complex<double> Type;
2216  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2217  IsSame<typename T2::ElementType,Type>::value &&
2218  IsSame<typename T3::ElementType,Type>::value };
2219  };
2220  //**********************************************************************************************
2221 
2222  //**********************************************************************************************
2224 
2226  template< typename T1, typename T2, typename T3, typename T4 >
2227  struct UseDefaultKernel {
2228  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2229  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2230  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2231  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2232  };
2233  //**********************************************************************************************
2234 
2235  //**********************************************************************************************
2237 
2239  template< typename T1, typename T2, typename T3, typename T4 >
2240  struct UseVectorizedDefaultKernel {
2241  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2242  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2243  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2244  IsSame<typename T1::ElementType,T4>::value &&
2245  IntrinsicTrait<typename T1::ElementType>::addition &&
2246  IntrinsicTrait<typename T1::ElementType>::subtraction &&
2247  IntrinsicTrait<typename T1::ElementType>::multiplication };
2248  };
2249  //**********************************************************************************************
2250 
2251  public:
2252  //**Type definitions****************************************************************************
2253  typedef DMatScalarMultExpr<MMM,ST,true> This;
2254  typedef typename MultTrait<RES,ST>::Type ResultType;
2255  typedef typename ResultType::OppositeType OppositeType;
2256  typedef typename ResultType::TransposeType TransposeType;
2257  typedef typename ResultType::ElementType ElementType;
2258  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2259  typedef const ElementType ReturnType;
2260  typedef const ResultType CompositeType;
2261 
2263  typedef const TDMatTDMatMultExpr<MT1,MT2> LeftOperand;
2264 
2266  typedef ST RightOperand;
2267 
2269  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2270 
2272  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2273  //**********************************************************************************************
2274 
2275  //**Compilation flags***************************************************************************
2277  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2278  IsSame<ET1,ET2>::value &&
2279  IsSame<ET1,ST>::value &&
2280  IntrinsicTrait<ET1>::addition &&
2281  IntrinsicTrait<ET1>::multiplication };
2282 
2284  enum { smpAssignable = !evaluateLeft && !evaluateRight };
2285  //**********************************************************************************************
2286 
2287  //**Constructor*********************************************************************************
2293  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2294  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2295  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2296  {}
2297  //**********************************************************************************************
2298 
2299  //**Access operator*****************************************************************************
2306  inline ReturnType operator()( size_t i, size_t j ) const {
2307  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2308  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2309  return matrix_(i,j) * scalar_;
2310  }
2311  //**********************************************************************************************
2312 
2313  //**Rows function*******************************************************************************
2318  inline size_t rows() const {
2319  return matrix_.rows();
2320  }
2321  //**********************************************************************************************
2322 
2323  //**Columns function****************************************************************************
2328  inline size_t columns() const {
2329  return matrix_.columns();
2330  }
2331  //**********************************************************************************************
2332 
2333  //**Left operand access*************************************************************************
2338  inline LeftOperand leftOperand() const {
2339  return matrix_;
2340  }
2341  //**********************************************************************************************
2342 
2343  //**Right operand access************************************************************************
2348  inline RightOperand rightOperand() const {
2349  return scalar_;
2350  }
2351  //**********************************************************************************************
2352 
2353  //**********************************************************************************************
2359  template< typename T >
2360  inline bool canAlias( const T* alias ) const {
2361  return matrix_.canAlias( alias );
2362  }
2363  //**********************************************************************************************
2364 
2365  //**********************************************************************************************
2371  template< typename T >
2372  inline bool isAliased( const T* alias ) const {
2373  return matrix_.isAliased( alias );
2374  }
2375  //**********************************************************************************************
2376 
2377  //**********************************************************************************************
2382  inline bool isAligned() const {
2383  return matrix_.isAligned();
2384  }
2385  //**********************************************************************************************
2386 
2387  //**********************************************************************************************
2392  inline bool canSMPAssign() const {
2393  typename MMM::RightOperand B( matrix_.rightOperand() );
2394  return ( !BLAZE_BLAS_IS_PARALLEL ||
2395  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
2396  ( B.columns() > SMP_TDMATTDMATMULT_THRESHOLD );
2397  }
2398  //**********************************************************************************************
2399 
2400  private:
2401  //**Member variables****************************************************************************
2402  LeftOperand matrix_;
2403  RightOperand scalar_;
2404  //**********************************************************************************************
2405 
2406  //**Assignment to dense matrices****************************************************************
2415  template< typename MT3 // Type of the target dense matrix
2416  , bool SO > // Storage order of the target dense matrix
2417  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2418  {
2420 
2421  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2422  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2423 
2424  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2425  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2426 
2427  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2428  return;
2429  }
2430  else if( left.columns() == 0UL ) {
2431  reset( ~lhs );
2432  return;
2433  }
2434 
2435  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2436  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2437 
2438  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2439  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2440  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2441  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2442  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2443  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2444 
2445  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2446  }
2447  //**********************************************************************************************
2448 
2449  //**Assignment to dense matrices (kernel selection)*********************************************
2460  template< typename MT3 // Type of the left-hand side target matrix
2461  , typename MT4 // Type of the left-hand side matrix operand
2462  , typename MT5 // Type of the right-hand side matrix operand
2463  , typename ST2 > // Type of the scalar value
2464  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2465  selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2466  {
2467  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
2468  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2469  else
2470  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2471  }
2472  //**********************************************************************************************
2473 
2474  //**Assignment to dense matrices (kernel selection)*********************************************
2485  template< typename MT3 // Type of the left-hand side target matrix
2486  , typename MT4 // Type of the left-hand side matrix operand
2487  , typename MT5 // Type of the right-hand side matrix operand
2488  , typename ST2 > // Type of the scalar value
2489  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2490  selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2491  {
2492  smpAssign( C, A * B * scalar );
2493  }
2494  //**********************************************************************************************
2495 
2496  //**Default assignment to dense matrices********************************************************
2510  template< typename MT3 // Type of the left-hand side target matrix
2511  , typename MT4 // Type of the left-hand side matrix operand
2512  , typename MT5 // Type of the right-hand side matrix operand
2513  , typename ST2 > // Type of the scalar value
2514  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2515  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2516  {
2517  for( size_t i=0UL; i<A.rows(); ++i ) {
2518  for( size_t k=0UL; k<B.columns(); ++k ) {
2519  C(i,k) = A(i,0UL) * B(0UL,k);
2520  }
2521  for( size_t j=1UL; j<A.columns(); ++j ) {
2522  for( size_t k=0UL; k<B.columns(); ++k ) {
2523  C(i,k) += A(i,j) * B(j,k);
2524  }
2525  }
2526  for( size_t k=0UL; k<B.columns(); ++k ) {
2527  C(i,k) *= scalar;
2528  }
2529  }
2530  }
2531  //**********************************************************************************************
2532 
2533  //**Vectorized default assignment to row-major dense matrices***********************************
2547  template< typename MT3 // Type of the left-hand side target matrix
2548  , typename MT4 // Type of the left-hand side matrix operand
2549  , typename MT5 // Type of the right-hand side matrix operand
2550  , typename ST2 > // Type of the scalar value
2551  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2552  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2553  {
2556 
2557  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2558  const typename MT5::OppositeType tmp( B );
2559  smpAssign( ~C, A * tmp * scalar );
2560  }
2561  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2562  const typename MT4::OppositeType tmp( A );
2563  smpAssign( ~C, tmp * B * scalar );
2564  }
2565  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2566  const typename MT5::OppositeType tmp( B );
2567  smpAssign( ~C, A * tmp * scalar );
2568  }
2569  else {
2570  const typename MT4::OppositeType tmp( A );
2571  smpAssign( ~C, tmp * B * scalar );
2572  }
2573  }
2574  //**********************************************************************************************
2575 
2576  //**Vectorized default assignment to column-major dense matrices********************************
2590  template< typename MT3 // Type of the left-hand side target matrix
2591  , typename MT4 // Type of the left-hand side matrix operand
2592  , typename MT5 // Type of the right-hand side matrix operand
2593  , typename ST2 > // Type of the scalar value
2594  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2595  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2596  {
2597  typedef IntrinsicTrait<ElementType> IT;
2598 
2599  const size_t M( A.rows() );
2600  const size_t N( B.columns() );
2601  const size_t K( A.columns() );
2602 
2603  const IntrinsicType factor( set( scalar ) );
2604 
2605  size_t i( 0UL );
2606 
2607  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2608  for( size_t j=0UL; j<N; ++j ) {
2609  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2610  for( size_t k=0UL; k<K; ++k ) {
2611  const IntrinsicType b1( set( B(k,j) ) );
2612  xmm1 = xmm1 + A.load(i ,k) * b1;
2613  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2614  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2615  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2616  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
2617  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
2618  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
2619  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
2620  }
2621  (~C).store( i , j, xmm1 * factor );
2622  (~C).store( i+IT::size , j, xmm2 * factor );
2623  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
2624  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
2625  (~C).store( i+IT::size*4UL, j, xmm5 * factor );
2626  (~C).store( i+IT::size*5UL, j, xmm6 * factor );
2627  (~C).store( i+IT::size*6UL, j, xmm7 * factor );
2628  (~C).store( i+IT::size*7UL, j, xmm8 * factor );
2629  }
2630  }
2631  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2632  size_t j( 0UL );
2633  for( ; (j+2UL) <= N; j+=2UL ) {
2634  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2635  for( size_t k=0UL; k<K; ++k ) {
2636  const IntrinsicType a1( A.load(i ,k) );
2637  const IntrinsicType a2( A.load(i+IT::size ,k) );
2638  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
2639  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
2640  const IntrinsicType b1( set( B(k,j ) ) );
2641  const IntrinsicType b2( set( B(k,j+1UL) ) );
2642  xmm1 = xmm1 + a1 * b1;
2643  xmm2 = xmm2 + a2 * b1;
2644  xmm3 = xmm3 + a3 * b1;
2645  xmm4 = xmm4 + a4 * b1;
2646  xmm5 = xmm5 + a1 * b2;
2647  xmm6 = xmm6 + a2 * b2;
2648  xmm7 = xmm7 + a3 * b2;
2649  xmm8 = xmm8 + a4 * b2;
2650  }
2651  (~C).store( i , j , xmm1 * factor );
2652  (~C).store( i+IT::size , j , xmm2 * factor );
2653  (~C).store( i+IT::size*2UL, j , xmm3 * factor );
2654  (~C).store( i+IT::size*3UL, j , xmm4 * factor );
2655  (~C).store( i , j+1UL, xmm5 * factor );
2656  (~C).store( i+IT::size , j+1UL, xmm6 * factor );
2657  (~C).store( i+IT::size*2UL, j+1UL, xmm7 * factor );
2658  (~C).store( i+IT::size*3UL, j+1UL, xmm8 * factor );
2659  }
2660  if( j < N ) {
2661  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2662  for( size_t k=0UL; k<K; ++k ) {
2663  const IntrinsicType b1( set( B(k,j) ) );
2664  xmm1 = xmm1 + A.load(i ,k) * b1;
2665  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2666  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2667  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2668  }
2669  (~C).store( i , j, xmm1 * factor );
2670  (~C).store( i+IT::size , j, xmm2 * factor );
2671  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
2672  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
2673  }
2674  }
2675  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2676  size_t j( 0UL );
2677  for( ; (j+2UL) <= N; j+=2UL ) {
2678  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2679  for( size_t k=0UL; k<K; ++k ) {
2680  const IntrinsicType a1( A.load(i ,k) );
2681  const IntrinsicType a2( A.load(i+IT::size,k) );
2682  const IntrinsicType b1( set( B(k,j ) ) );
2683  const IntrinsicType b2( set( B(k,j+1UL) ) );
2684  xmm1 = xmm1 + a1 * b1;
2685  xmm2 = xmm2 + a2 * b1;
2686  xmm3 = xmm3 + a1 * b2;
2687  xmm4 = xmm4 + a2 * b2;
2688  }
2689  (~C).store( i , j , xmm1 * factor );
2690  (~C).store( i+IT::size, j , xmm2 * factor );
2691  (~C).store( i , j+1UL, xmm3 * factor );
2692  (~C).store( i+IT::size, j+1UL, xmm4 * factor );
2693  }
2694  if( j < N ) {
2695  IntrinsicType xmm1, xmm2;
2696  for( size_t k=0UL; k<K; ++k ) {
2697  const IntrinsicType b1( set( B(k,j) ) );
2698  xmm1 = xmm1 + A.load(i ,k) * b1;
2699  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
2700  }
2701  (~C).store( i , j, xmm1 * factor );
2702  (~C).store( i+IT::size, j, xmm2 * factor );
2703  }
2704  }
2705  if( i < M ) {
2706  size_t j( 0UL );
2707  for( ; (j+2UL) <= N; j+=2UL ) {
2708  IntrinsicType xmm1, xmm2;
2709  for( size_t k=0UL; k<K; ++k ) {
2710  const IntrinsicType a1( A.load(i,k) );
2711  xmm1 = xmm1 + a1 * set( B(k,j ) );
2712  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2713  }
2714  (~C).store( i, j , xmm1 * factor );
2715  (~C).store( i, j+1UL, xmm2 * factor );
2716  }
2717  if( j < N ) {
2718  IntrinsicType xmm1;
2719  for( size_t k=0UL; k<K; ++k ) {
2720  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
2721  }
2722  (~C).store( i, j, xmm1 * factor );
2723  }
2724  }
2725  }
2726  //**********************************************************************************************
2727 
2728  //**BLAS-based assignment to dense matrices (default)*******************************************
2742  template< typename MT3 // Type of the left-hand side target matrix
2743  , typename MT4 // Type of the left-hand side matrix operand
2744  , typename MT5 // Type of the right-hand side matrix operand
2745  , typename ST2 > // Type of the scalar value
2746  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2747  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2748  {
2749  selectDefaultAssignKernel( C, A, B, scalar );
2750  }
2751  //**********************************************************************************************
2752 
2753  //**BLAS-based assignment to dense matrices (single precision)**********************************
2754 #if BLAZE_BLAS_MODE
2755 
2768  template< typename MT3 // Type of the left-hand side target matrix
2769  , typename MT4 // Type of the left-hand side matrix operand
2770  , typename MT5 // Type of the right-hand side matrix operand
2771  , typename ST2 > // Type of the scalar value
2772  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2773  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2774  {
2775  using boost::numeric_cast;
2776 
2780 
2781  const int M ( numeric_cast<int>( A.rows() ) );
2782  const int N ( numeric_cast<int>( B.columns() ) );
2783  const int K ( numeric_cast<int>( A.columns() ) );
2784  const int lda( numeric_cast<int>( A.spacing() ) );
2785  const int ldb( numeric_cast<int>( B.spacing() ) );
2786  const int ldc( numeric_cast<int>( C.spacing() ) );
2787 
2788  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2789  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2790  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2791  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2792  }
2793 #endif
2794  //**********************************************************************************************
2795 
2796  //**BLAS-based assignment to dense matrices (double precision)**********************************
2797 #if BLAZE_BLAS_MODE
2798 
2811  template< typename MT3 // Type of the left-hand side target matrix
2812  , typename MT4 // Type of the left-hand side matrix operand
2813  , typename MT5 // Type of the right-hand side matrix operand
2814  , typename ST2 > // Type of the scalar value
2815  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2816  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2817  {
2818  using boost::numeric_cast;
2819 
2823 
2824  const int M ( numeric_cast<int>( A.rows() ) );
2825  const int N ( numeric_cast<int>( B.columns() ) );
2826  const int K ( numeric_cast<int>( A.columns() ) );
2827  const int lda( numeric_cast<int>( A.spacing() ) );
2828  const int ldb( numeric_cast<int>( B.spacing() ) );
2829  const int ldc( numeric_cast<int>( C.spacing() ) );
2830 
2831  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2832  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2833  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2834  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2835  }
2836 #endif
2837  //**********************************************************************************************
2838 
2839  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2840 #if BLAZE_BLAS_MODE
2841 
2854  template< typename MT3 // Type of the left-hand side target matrix
2855  , typename MT4 // Type of the left-hand side matrix operand
2856  , typename MT5 // Type of the right-hand side matrix operand
2857  , typename ST2 > // Type of the scalar value
2858  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2859  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2860  {
2861  using boost::numeric_cast;
2862 
2866  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2867  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2868  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2869 
2870  const int M ( numeric_cast<int>( A.rows() ) );
2871  const int N ( numeric_cast<int>( B.columns() ) );
2872  const int K ( numeric_cast<int>( A.columns() ) );
2873  const int lda( numeric_cast<int>( A.spacing() ) );
2874  const int ldb( numeric_cast<int>( B.spacing() ) );
2875  const int ldc( numeric_cast<int>( C.spacing() ) );
2876  const complex<float> alpha( scalar );
2877  const complex<float> beta ( 0.0F, 0.0F );
2878 
2879  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2880  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2881  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2882  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2883  }
2884 #endif
2885  //**********************************************************************************************
2886 
2887  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2888 #if BLAZE_BLAS_MODE
2889 
2902  template< typename MT3 // Type of the left-hand side target matrix
2903  , typename MT4 // Type of the left-hand side matrix operand
2904  , typename MT5 // Type of the right-hand side matrix operand
2905  , typename ST2 > // Type of the scalar value
2906  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2907  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2908  {
2909  using boost::numeric_cast;
2910 
2914  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2915  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2916  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2917 
2918  const int M ( numeric_cast<int>( A.rows() ) );
2919  const int N ( numeric_cast<int>( B.columns() ) );
2920  const int K ( numeric_cast<int>( A.columns() ) );
2921  const int lda( numeric_cast<int>( A.spacing() ) );
2922  const int ldb( numeric_cast<int>( B.spacing() ) );
2923  const int ldc( numeric_cast<int>( C.spacing() ) );
2924  const complex<double> alpha( scalar );
2925  const complex<double> beta ( 0.0, 0.0 );
2926 
2927  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2928  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2929  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2930  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2931  }
2932 #endif
2933  //**********************************************************************************************
2934 
2935  //**Assignment to sparse matrices***************************************************************
2947  template< typename MT // Type of the target sparse matrix
2948  , bool SO > // Storage order of the target sparse matrix
2949  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2950  {
2952 
2953  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2954 
2961 
2962  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2963  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2964 
2965  const TmpType tmp( rhs );
2966  smpAssign( ~lhs, tmp );
2967  }
2968  //**********************************************************************************************
2969 
2970  //**Addition assignment to dense matrices*******************************************************
2982  template< typename MT3 // Type of the target dense matrix
2983  , bool SO > // Storage order of the target dense matrix
2984  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2985  {
2987 
2988  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2989  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2990 
2991  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2992  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2993 
2994  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2995  return;
2996  }
2997 
2998  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2999  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3000 
3001  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3002  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3003  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3004  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3005  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3006  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3007 
3008  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3009  }
3010  //**********************************************************************************************
3011 
3012  //**Addition assignment to dense matrices (kernel selection)************************************
3023  template< typename MT3 // Type of the left-hand side target matrix
3024  , typename MT4 // Type of the left-hand side matrix operand
3025  , typename MT5 // Type of the right-hand side matrix operand
3026  , typename ST2 > // Type of the scalar value
3027  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3028  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3029  {
3030  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
3031  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3032  else
3033  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3034  }
3035  //**********************************************************************************************
3036 
3037  //**Addition assignment to dense matrices (kernel selection)************************************
3048  template< typename MT3 // Type of the left-hand side target matrix
3049  , typename MT4 // Type of the left-hand side matrix operand
3050  , typename MT5 // Type of the right-hand side matrix operand
3051  , typename ST2 > // Type of the scalar value
3052  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3053  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3054  {
3055  smpAddAssign( C, A * B * scalar );
3056  }
3057  //**********************************************************************************************
3058 
3059  //**Default addition assignment to dense matrices***********************************************
3073  template< typename MT3 // Type of the left-hand side target matrix
3074  , typename MT4 // Type of the left-hand side matrix operand
3075  , typename MT5 // Type of the right-hand side matrix operand
3076  , typename ST2 > // Type of the scalar value
3077  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3078  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3079  {
3080  const ResultType tmp( A * B * scalar );
3081  addAssign( C, tmp );
3082  }
3083  //**********************************************************************************************
3084 
3085  //**Vectorized default addition assignment to row-major dense matrices**************************
3099  template< typename MT3 // Type of the left-hand side target matrix
3100  , typename MT4 // Type of the left-hand side matrix operand
3101  , typename MT5 // Type of the right-hand side matrix operand
3102  , typename ST2 > // Type of the scalar value
3103  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3104  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3105  {
3108 
3109  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3110  const typename MT5::OppositeType tmp( B );
3111  addAssign( ~C, A * tmp * scalar );
3112  }
3113  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3114  const typename MT4::OppositeType tmp( A );
3115  addAssign( ~C, tmp * B * scalar );
3116  }
3117  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3118  const typename MT5::OppositeType tmp( B );
3119  addAssign( ~C, A * tmp * scalar );
3120  }
3121  else {
3122  const typename MT4::OppositeType tmp( A );
3123  addAssign( ~C, tmp * B * scalar );
3124  }
3125  }
3126  //**********************************************************************************************
3127 
3128  //**Vectorized default addition assignment to column-major dense matrices***********************
3142  template< typename MT3 // Type of the left-hand side target matrix
3143  , typename MT4 // Type of the left-hand side matrix operand
3144  , typename MT5 // Type of the right-hand side matrix operand
3145  , typename ST2 > // Type of the scalar value
3146  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3147  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3148  {
3149  typedef IntrinsicTrait<ElementType> IT;
3150 
3151  const size_t M( A.rows() );
3152  const size_t N( B.columns() );
3153  const size_t K( A.columns() );
3154 
3155  const IntrinsicType factor( set( scalar ) );
3156 
3157  size_t i( 0UL );
3158 
3159  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3160  for( size_t j=0UL; j<N; ++j ) {
3161  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3162  for( size_t k=0UL; k<K; ++k ) {
3163  const IntrinsicType b1( set( B(k,j) ) );
3164  xmm1 = xmm1 + A.load(i ,k) * b1;
3165  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3166  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3167  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3168  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3169  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3170  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3171  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3172  }
3173  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3174  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3175  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3176  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3177  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) + xmm5 * factor );
3178  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) + xmm6 * factor );
3179  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) + xmm7 * factor );
3180  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) + xmm8 * factor );
3181  }
3182  }
3183  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3184  size_t j( 0UL );
3185  for( ; (j+2UL) <= N; j+=2UL ) {
3186  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3187  for( size_t k=0UL; k<K; ++k ) {
3188  const IntrinsicType a1( A.load(i ,k) );
3189  const IntrinsicType a2( A.load(i+IT::size ,k) );
3190  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
3191  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
3192  const IntrinsicType b1( set( B(k,j ) ) );
3193  const IntrinsicType b2( set( B(k,j+1UL) ) );
3194  xmm1 = xmm1 + a1 * b1;
3195  xmm2 = xmm2 + a2 * b1;
3196  xmm3 = xmm3 + a3 * b1;
3197  xmm4 = xmm4 + a4 * b1;
3198  xmm5 = xmm5 + a1 * b2;
3199  xmm6 = xmm6 + a2 * b2;
3200  xmm7 = xmm7 + a3 * b2;
3201  xmm8 = xmm8 + a4 * b2;
3202  }
3203  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3204  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) + xmm2 * factor );
3205  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) + xmm3 * factor );
3206  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) + xmm4 * factor );
3207  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
3208  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) + xmm6 * factor );
3209  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) + xmm7 * factor );
3210  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) + xmm8 * factor );
3211  }
3212  if( j < N ) {
3213  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3214  for( size_t k=0UL; k<K; ++k ) {
3215  const IntrinsicType b1( set( B(k,j) ) );
3216  xmm1 = xmm1 + A.load(i ,k) * b1;
3217  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3218  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3219  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3220  }
3221  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3222  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3223  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3224  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3225  }
3226  }
3227  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3228  size_t j( 0UL );
3229  for( ; (j+2UL) <= N; j+=2UL ) {
3230  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3231  for( size_t k=0UL; k<K; ++k ) {
3232  const IntrinsicType a1( A.load(i ,k) );
3233  const IntrinsicType a2( A.load(i+IT::size,k) );
3234  const IntrinsicType b1( set( B(k,j ) ) );
3235  const IntrinsicType b2( set( B(k,j+1UL) ) );
3236  xmm1 = xmm1 + a1 * b1;
3237  xmm2 = xmm2 + a2 * b1;
3238  xmm3 = xmm3 + a1 * b2;
3239  xmm4 = xmm4 + a2 * b2;
3240  }
3241  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3242  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) + xmm2 * factor );
3243  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
3244  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) + xmm4 * factor );
3245  }
3246  if( j < N ) {
3247  IntrinsicType xmm1, xmm2;
3248  for( size_t k=0UL; k<K; ++k ) {
3249  const IntrinsicType b1( set( B(k,j) ) );
3250  xmm1 = xmm1 + A.load(i ,k) * b1;
3251  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3252  }
3253  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3254  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) + xmm2 * factor );
3255  }
3256  }
3257  if( i < M ) {
3258  size_t j( 0UL );
3259  for( ; (j+2UL) <= N; j+=2UL ) {
3260  IntrinsicType xmm1, xmm2;
3261  for( size_t k=0UL; k<K; ++k ) {
3262  const IntrinsicType a1( A.load(i,k) );
3263  xmm1 = xmm1 + a1 * set( B(k,j ) );
3264  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3265  }
3266  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3267  (~C).store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
3268  }
3269  if( j < N ) {
3270  IntrinsicType xmm1;
3271  for( size_t k=0UL; k<K; ++k ) {
3272  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
3273  }
3274  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
3275  }
3276  }
3277  }
3278  //**********************************************************************************************
3279 
3280  //**BLAS-based addition assignment to dense matrices (default)**********************************
3294  template< typename MT3 // Type of the left-hand side target matrix
3295  , typename MT4 // Type of the left-hand side matrix operand
3296  , typename MT5 // Type of the right-hand side matrix operand
3297  , typename ST2 > // Type of the scalar value
3298  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3299  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3300  {
3301  selectDefaultAddAssignKernel( C, A, B, scalar );
3302  }
3303  //**********************************************************************************************
3304 
3305  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3306 #if BLAZE_BLAS_MODE
3307 
3320  template< typename MT3 // Type of the left-hand side target matrix
3321  , typename MT4 // Type of the left-hand side matrix operand
3322  , typename MT5 // Type of the right-hand side matrix operand
3323  , typename ST2 > // Type of the scalar value
3324  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3325  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3326  {
3327  using boost::numeric_cast;
3328 
3332 
3333  const int M ( numeric_cast<int>( A.rows() ) );
3334  const int N ( numeric_cast<int>( B.columns() ) );
3335  const int K ( numeric_cast<int>( A.columns() ) );
3336  const int lda( numeric_cast<int>( A.spacing() ) );
3337  const int ldb( numeric_cast<int>( B.spacing() ) );
3338  const int ldc( numeric_cast<int>( C.spacing() ) );
3339 
3340  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3341  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3342  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3343  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3344  }
3345 #endif
3346  //**********************************************************************************************
3347 
3348  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3349 #if BLAZE_BLAS_MODE
3350 
3363  template< typename MT3 // Type of the left-hand side target matrix
3364  , typename MT4 // Type of the left-hand side matrix operand
3365  , typename MT5 // Type of the right-hand side matrix operand
3366  , typename ST2 > // Type of the scalar value
3367  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3368  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3369  {
3370  using boost::numeric_cast;
3371 
3375 
3376  const int M ( numeric_cast<int>( A.rows() ) );
3377  const int N ( numeric_cast<int>( B.columns() ) );
3378  const int K ( numeric_cast<int>( A.columns() ) );
3379  const int lda( numeric_cast<int>( A.spacing() ) );
3380  const int ldb( numeric_cast<int>( B.spacing() ) );
3381  const int ldc( numeric_cast<int>( C.spacing() ) );
3382 
3383  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3384  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3385  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3386  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3387  }
3388 #endif
3389  //**********************************************************************************************
3390 
3391  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3392 #if BLAZE_BLAS_MODE
3393 
3406  template< typename MT3 // Type of the left-hand side target matrix
3407  , typename MT4 // Type of the left-hand side matrix operand
3408  , typename MT5 // Type of the right-hand side matrix operand
3409  , typename ST2 > // Type of the scalar value
3410  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3411  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3412  {
3413  using boost::numeric_cast;
3414 
3418  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3419  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3420  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3421 
3422  const int M ( numeric_cast<int>( A.rows() ) );
3423  const int N ( numeric_cast<int>( B.columns() ) );
3424  const int K ( numeric_cast<int>( A.columns() ) );
3425  const int lda( numeric_cast<int>( A.spacing() ) );
3426  const int ldb( numeric_cast<int>( B.spacing() ) );
3427  const int ldc( numeric_cast<int>( C.spacing() ) );
3428  const complex<float> alpha( scalar );
3429  const complex<float> beta ( 1.0F, 0.0F );
3430 
3431  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3432  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3433  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3434  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3435  }
3436 #endif
3437  //**********************************************************************************************
3438 
3439  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3440 #if BLAZE_BLAS_MODE
3441 
3454  template< typename MT3 // Type of the left-hand side target matrix
3455  , typename MT4 // Type of the left-hand side matrix operand
3456  , typename MT5 // Type of the right-hand side matrix operand
3457  , typename ST2 > // Type of the scalar value
3458  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3459  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3460  {
3461  using boost::numeric_cast;
3462 
3466  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3467  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3468  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3469 
3470  const int M ( numeric_cast<int>( A.rows() ) );
3471  const int N ( numeric_cast<int>( B.columns() ) );
3472  const int K ( numeric_cast<int>( A.columns() ) );
3473  const int lda( numeric_cast<int>( A.spacing() ) );
3474  const int ldb( numeric_cast<int>( B.spacing() ) );
3475  const int ldc( numeric_cast<int>( C.spacing() ) );
3476  const complex<double> alpha( scalar );
3477  const complex<double> beta ( 1.0, 0.0 );
3478 
3479  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3480  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3481  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3482  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3483  }
3484 #endif
3485  //**********************************************************************************************
3486 
3487  //**Addition assignment to sparse matrices******************************************************
3488  // No special implementation for the addition assignment to sparse matrices.
3489  //**********************************************************************************************
3490 
3491  //**Subtraction assignment to dense matrices****************************************************
3503  template< typename MT3 // Type of the target dense matrix
3504  , bool SO > // Storage order of the target dense matrix
3505  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3506  {
3508 
3509  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3510  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3511 
3512  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3513  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3514 
3515  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3516  return;
3517  }
3518 
3519  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3520  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3521 
3522  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3523  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3524  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3525  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3526  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3527  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3528 
3529  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3530  }
3531  //**********************************************************************************************
3532 
3533  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3544  template< typename MT3 // Type of the left-hand side target matrix
3545  , typename MT4 // Type of the left-hand side matrix operand
3546  , typename MT5 // Type of the right-hand side matrix operand
3547  , typename ST2 > // Type of the scalar value
3548  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3549  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3550  {
3551  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
3552  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3553  else
3554  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3555  }
3556  //**********************************************************************************************
3557 
3558  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3569  template< typename MT3 // Type of the left-hand side target matrix
3570  , typename MT4 // Type of the left-hand side matrix operand
3571  , typename MT5 // Type of the right-hand side matrix operand
3572  , typename ST2 > // Type of the scalar value
3573  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3574  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3575  {
3576  smpSubAssign( C, A * B * scalar );
3577  }
3578  //**********************************************************************************************
3579 
3580  //**Default subtraction assignment to dense matrices********************************************
3594  template< typename MT3 // Type of the left-hand side target matrix
3595  , typename MT4 // Type of the left-hand side matrix operand
3596  , typename MT5 // Type of the right-hand side matrix operand
3597  , typename ST2 > // Type of the scalar value
3598  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3599  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3600  {
3601  const ResultType tmp( A * B * scalar );
3602  subAssign( C, tmp );
3603  }
3604  //**********************************************************************************************
3605 
3606  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3620  template< typename MT3 // Type of the left-hand side target matrix
3621  , typename MT4 // Type of the left-hand side matrix operand
3622  , typename MT5 // Type of the right-hand side matrix operand
3623  , typename ST2 > // Type of the scalar value
3624  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3625  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3626  {
3629 
3630  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3631  const typename MT5::OppositeType tmp( B );
3632  subAssign( ~C, A * tmp * scalar );
3633  }
3634  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3635  const typename MT4::OppositeType tmp( A );
3636  subAssign( ~C, tmp * B * scalar );
3637  }
3638  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3639  const typename MT5::OppositeType tmp( B );
3640  subAssign( ~C, A * tmp * scalar );
3641  }
3642  else {
3643  const typename MT4::OppositeType tmp( A );
3644  subAssign( ~C, tmp * B * scalar );
3645  }
3646  }
3647  //**********************************************************************************************
3648 
3649  //**Vectorized default subtraction assignment to column-major dense matrices********************
3663  template< typename MT3 // Type of the left-hand side target matrix
3664  , typename MT4 // Type of the left-hand side matrix operand
3665  , typename MT5 // Type of the right-hand side matrix operand
3666  , typename ST2 > // Type of the scalar value
3667  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3668  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3669  {
3670  typedef IntrinsicTrait<ElementType> IT;
3671 
3672  const size_t M( A.rows() );
3673  const size_t N( B.columns() );
3674  const size_t K( A.columns() );
3675 
3676  const IntrinsicType factor( set( scalar ) );
3677 
3678  size_t i( 0UL );
3679 
3680  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3681  for( size_t j=0UL; j<N; ++j ) {
3682  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3683  for( size_t k=0UL; k<K; ++k ) {
3684  const IntrinsicType b1( set( B(k,j) ) );
3685  xmm1 = xmm1 + A.load(i ,k) * b1;
3686  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3687  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3688  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3689  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3690  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3691  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3692  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3693  }
3694  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3695  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3696  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3697  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3698  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) - xmm5 * factor );
3699  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) - xmm6 * factor );
3700  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) - xmm7 * factor );
3701  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) - xmm8 * factor );
3702  }
3703  }
3704  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3705  size_t j( 0UL );
3706  for( ; (j+2UL) <= N; j+=2UL ) {
3707  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3708  for( size_t k=0UL; k<K; ++k ) {
3709  const IntrinsicType a1( A.load(i ,k) );
3710  const IntrinsicType a2( A.load(i+IT::size ,k) );
3711  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
3712  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
3713  const IntrinsicType b1( set( B(k,j ) ) );
3714  const IntrinsicType b2( set( B(k,j+1UL) ) );
3715  xmm1 = xmm1 + a1 * b1;
3716  xmm2 = xmm2 + a2 * b1;
3717  xmm3 = xmm3 + a3 * b1;
3718  xmm4 = xmm4 + a4 * b1;
3719  xmm5 = xmm5 + a1 * b2;
3720  xmm6 = xmm6 + a2 * b2;
3721  xmm7 = xmm7 + a3 * b2;
3722  xmm8 = xmm8 + a4 * b2;
3723  }
3724  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3725  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) - xmm2 * factor );
3726  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) - xmm3 * factor );
3727  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) - xmm4 * factor );
3728  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
3729  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) - xmm6 * factor );
3730  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) - xmm7 * factor );
3731  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) - xmm8 * factor );
3732  }
3733  if( j < N ) {
3734  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3735  for( size_t k=0UL; k<K; ++k ) {
3736  const IntrinsicType b1( set( B(k,j) ) );
3737  xmm1 = xmm1 + A.load(i ,k) * b1;
3738  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3739  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3740  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3741  }
3742  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3743  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3744  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3745  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3746  }
3747  }
3748  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3749  size_t j( 0UL );
3750  for( ; (j+2UL) <= N; j+=2UL ) {
3751  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3752  for( size_t k=0UL; k<K; ++k ) {
3753  const IntrinsicType a1( A.load(i ,k) );
3754  const IntrinsicType a2( A.load(i+IT::size,k) );
3755  const IntrinsicType b1( set( B(k,j ) ) );
3756  const IntrinsicType b2( set( B(k,j+1UL) ) );
3757  xmm1 = xmm1 + a1 * b1;
3758  xmm2 = xmm2 + a2 * b1;
3759  xmm3 = xmm3 + a1 * b2;
3760  xmm4 = xmm4 + a2 * b2;
3761  }
3762  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3763  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) - xmm2 * factor );
3764  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
3765  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) - xmm4 * factor );
3766  }
3767  if( j < N ) {
3768  IntrinsicType xmm1, xmm2;
3769  for( size_t k=0UL; k<K; ++k ) {
3770  const IntrinsicType b1( set( B(k,j) ) );
3771  xmm1 = xmm1 + A.load(i ,k) * b1;
3772  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3773  }
3774  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3775  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) - xmm2 * factor );
3776  }
3777  }
3778  if( i < M ) {
3779  size_t j( 0UL );
3780  for( ; (j+2UL) <= N; j+=2UL ) {
3781  IntrinsicType xmm1, xmm2;
3782  for( size_t k=0UL; k<K; ++k ) {
3783  const IntrinsicType a1( A.load(i,k) );
3784  xmm1 = xmm1 + a1 * set( B(k,j ) );
3785  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3786  }
3787  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3788  (~C).store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
3789  }
3790  if( j < N ) {
3791  IntrinsicType xmm1;
3792  for( size_t k=0UL; k<K; ++k ) {
3793  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
3794  }
3795  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
3796  }
3797  }
3798  }
3799  //**********************************************************************************************
3800 
3801  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3815  template< typename MT3 // Type of the left-hand side target matrix
3816  , typename MT4 // Type of the left-hand side matrix operand
3817  , typename MT5 // Type of the right-hand side matrix operand
3818  , typename ST2 > // Type of the scalar value
3819  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3820  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3821  {
3822  selectDefaultSubAssignKernel( C, A, B, scalar );
3823  }
3824  //**********************************************************************************************
3825 
3826  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3827 #if BLAZE_BLAS_MODE
3828 
3841  template< typename MT3 // Type of the left-hand side target matrix
3842  , typename MT4 // Type of the left-hand side matrix operand
3843  , typename MT5 // Type of the right-hand side matrix operand
3844  , typename ST2 > // Type of the scalar value
3845  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3846  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3847  {
3848  using boost::numeric_cast;
3849 
3853 
3854  const int M ( numeric_cast<int>( A.rows() ) );
3855  const int N ( numeric_cast<int>( B.columns() ) );
3856  const int K ( numeric_cast<int>( A.columns() ) );
3857  const int lda( numeric_cast<int>( A.spacing() ) );
3858  const int ldb( numeric_cast<int>( B.spacing() ) );
3859  const int ldc( numeric_cast<int>( C.spacing() ) );
3860 
3861  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3862  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3863  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3864  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3865  }
3866 #endif
3867  //**********************************************************************************************
3868 
3869  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3870 #if BLAZE_BLAS_MODE
3871 
3884  template< typename MT3 // Type of the left-hand side target matrix
3885  , typename MT4 // Type of the left-hand side matrix operand
3886  , typename MT5 // Type of the right-hand side matrix operand
3887  , typename ST2 > // Type of the scalar value
3888  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3889  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3890  {
3891  using boost::numeric_cast;
3892 
3896 
3897  const int M ( numeric_cast<int>( A.rows() ) );
3898  const int N ( numeric_cast<int>( B.columns() ) );
3899  const int K ( numeric_cast<int>( A.columns() ) );
3900  const int lda( numeric_cast<int>( A.spacing() ) );
3901  const int ldb( numeric_cast<int>( B.spacing() ) );
3902  const int ldc( numeric_cast<int>( C.spacing() ) );
3903 
3904  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3905  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3906  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3907  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3908  }
3909 #endif
3910  //**********************************************************************************************
3911 
3912  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3913 #if BLAZE_BLAS_MODE
3914 
3927  template< typename MT3 // Type of the left-hand side target matrix
3928  , typename MT4 // Type of the left-hand side matrix operand
3929  , typename MT5 // Type of the right-hand side matrix operand
3930  , typename ST2 > // Type of the scalar value
3931  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3932  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3933  {
3934  using boost::numeric_cast;
3935 
3939  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3940  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3941  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3942 
3943  const int M ( numeric_cast<int>( A.rows() ) );
3944  const int N ( numeric_cast<int>( B.columns() ) );
3945  const int K ( numeric_cast<int>( A.columns() ) );
3946  const int lda( numeric_cast<int>( A.spacing() ) );
3947  const int ldb( numeric_cast<int>( B.spacing() ) );
3948  const int ldc( numeric_cast<int>( C.spacing() ) );
3949  const complex<float> alpha( -scalar );
3950  const complex<float> beta ( 1.0F, 0.0F );
3951 
3952  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3953  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3954  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3955  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3956  }
3957 #endif
3958  //**********************************************************************************************
3959 
3960  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3961 #if BLAZE_BLAS_MODE
3962 
3975  template< typename MT3 // Type of the left-hand side target matrix
3976  , typename MT4 // Type of the left-hand side matrix operand
3977  , typename MT5 // Type of the right-hand side matrix operand
3978  , typename ST2 > // Type of the scalar value
3979  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3980  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3981  {
3982  using boost::numeric_cast;
3983 
3987  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3988  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3989  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3990 
3991  const int M ( numeric_cast<int>( A.rows() ) );
3992  const int N ( numeric_cast<int>( B.columns() ) );
3993  const int K ( numeric_cast<int>( A.columns() ) );
3994  const int lda( numeric_cast<int>( A.spacing() ) );
3995  const int ldb( numeric_cast<int>( B.spacing() ) );
3996  const int ldc( numeric_cast<int>( C.spacing() ) );
3997  const complex<double> alpha( -scalar );
3998  const complex<double> beta ( 1.0, 0.0 );
3999 
4000  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4001  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4002  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4003  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4004  }
4005 #endif
4006  //**********************************************************************************************
4007 
4008  //**Subtraction assignment to sparse matrices***************************************************
4009  // No special implementation for the subtraction assignment to sparse matrices.
4010  //**********************************************************************************************
4011 
4012  //**Multiplication assignment to dense matrices*************************************************
4013  // No special implementation for the multiplication assignment to dense matrices.
4014  //**********************************************************************************************
4015 
4016  //**Multiplication assignment to sparse matrices************************************************
4017  // No special implementation for the multiplication assignment to sparse matrices.
4018  //**********************************************************************************************
4019 
4020  //**Compile time checks*************************************************************************
4029  //**********************************************************************************************
4030 };
4032 //*************************************************************************************************
4033 
4034 
4035 
4036 
4037 //=================================================================================================
4038 //
4039 // GLOBAL BINARY ARITHMETIC OPERATORS
4040 //
4041 //=================================================================================================
4042 
4043 //*************************************************************************************************
4069 template< typename T1 // Type of the left-hand side dense matrix
4070  , typename T2 > // Type of the right-hand side dense matrix
4071 inline const TDMatTDMatMultExpr<T1,T2>
4073 {
4075 
4076  if( (~lhs).columns() != (~rhs).rows() )
4077  throw std::invalid_argument( "Matrix sizes do not match" );
4078 
4079  return TDMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
4080 }
4081 //*************************************************************************************************
4082 
4083 
4084 
4085 
4086 //=================================================================================================
4087 //
4088 // EXPRESSION TRAIT SPECIALIZATIONS
4089 //
4090 //=================================================================================================
4091 
4092 //*************************************************************************************************
4094 template< typename MT1, typename MT2, typename VT >
4095 struct TDMatDVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
4096 {
4097  public:
4098  //**********************************************************************************************
4099  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4100  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4101  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4102  , typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4103  , INVALID_TYPE >::Type Type;
4104  //**********************************************************************************************
4105 };
4107 //*************************************************************************************************
4108 
4109 
4110 //*************************************************************************************************
4112 template< typename MT1, typename MT2, typename VT >
4113 struct TDMatSVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
4114 {
4115  public:
4116  //**********************************************************************************************
4117  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4118  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4119  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4120  , typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4121  , INVALID_TYPE >::Type Type;
4122  //**********************************************************************************************
4123 };
4125 //*************************************************************************************************
4126 
4127 
4128 //*************************************************************************************************
4130 template< typename VT, typename MT1, typename MT2 >
4131 struct TDVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
4132 {
4133  public:
4134  //**********************************************************************************************
4135  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4136  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4137  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4138  , typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4139  , INVALID_TYPE >::Type Type;
4140  //**********************************************************************************************
4141 };
4143 //*************************************************************************************************
4144 
4145 
4146 //*************************************************************************************************
4148 template< typename VT, typename MT1, typename MT2 >
4149 struct TSVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
4150 {
4151  public:
4152  //**********************************************************************************************
4153  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4154  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4155  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4156  , typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4157  , INVALID_TYPE >::Type Type;
4158  //**********************************************************************************************
4159 };
4161 //*************************************************************************************************
4162 
4163 
4164 //*************************************************************************************************
4166 template< typename MT1, typename MT2, bool AF >
4167 struct SubmatrixExprTrait< TDMatTDMatMultExpr<MT1,MT2>, AF >
4168 {
4169  public:
4170  //**********************************************************************************************
4171  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4172  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4173  //**********************************************************************************************
4174 };
4176 //*************************************************************************************************
4177 
4178 
4179 //*************************************************************************************************
4181 template< typename MT1, typename MT2 >
4182 struct RowExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
4183 {
4184  public:
4185  //**********************************************************************************************
4186  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4187  //**********************************************************************************************
4188 };
4190 //*************************************************************************************************
4191 
4192 
4193 //*************************************************************************************************
4195 template< typename MT1, typename MT2 >
4196 struct ColumnExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
4197 {
4198  public:
4199  //**********************************************************************************************
4200  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
4201  //**********************************************************************************************
4202 };
4204 //*************************************************************************************************
4205 
4206 } // namespace blaze
4207 
4208 #endif
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:253
Data type constraint.
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:124
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4579
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4075
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:249
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:122
const size_t TDMATTDMATMULT_THRESHOLD
Column-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies t...
Definition: Thresholds.h:176
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:197
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:410
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:123
Header file for the IsColumnMajorMatrix type trait.
Header file for the sparse matrix SMP implementation.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTDMatMultExpr.h:391
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2384
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:249
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:244
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:297
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:247
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:127
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:369
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2380
Header file for the IsFloat type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:347
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:246
TDMatTDMatMultExpr< MT1, MT2 > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:243
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the multiplication trait.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:119
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TSVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:256
Header file for the TDMatSVecMultExprTrait class template.
Header file for the dense matrix SMP implementation.
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:357
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:250
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:381
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
Header file for the IsNumeric type trait.
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
const size_t SMP_TDMATTDMATMULT_THRESHOLD
SMP column-major dense matrix/column-major dense matrix multiplication threshold.This threshold repre...
Definition: Thresholds.h:472
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:245
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:247
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:327
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:259
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:282
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:120
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:248
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2379
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the complex data type.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:262
Header file for the IsColumnVector type trait.
Header file for the IsResizable type trait.
Constraint on the data type.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTDMatMultExpr.h:401
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
void store(float *address, const sse_float_t &value)
Aligned store of a vector of &#39;float&#39; values.
Definition: Store.h:242
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:337
Header file for the IsExpression type trait class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:121
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTDMatMultExpr.h:248
Header file for the FunctionTrace class.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:411