All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TDMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
52 #include <blaze/math/Intrinsics.h>
53 #include <blaze/math/shims/Reset.h>
74 #include <blaze/system/BLAS.h>
76 #include <blaze/util/Assert.h>
77 #include <blaze/util/Complex.h>
83 #include <blaze/util/EnableIf.h>
84 #include <blaze/util/InvalidType.h>
86 #include <blaze/util/SelectType.h>
87 #include <blaze/util/Types.h>
93 
94 
95 namespace blaze {
96 
97 //=================================================================================================
98 //
99 // CLASS TDMATTDMATMULTEXPR
100 //
101 //=================================================================================================
102 
103 //*************************************************************************************************
110 template< typename MT1 // Type of the left-hand side dense matrix
111  , typename MT2 > // Type of the right-hand side dense matrix
112 class TDMatTDMatMultExpr : public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
113  , private MatMatMultExpr
114  , private Computation
115 {
116  private:
117  //**Type definitions****************************************************************************
118  typedef typename MT1::ResultType RT1;
119  typedef typename MT2::ResultType RT2;
120  typedef typename RT1::ElementType ET1;
121  typedef typename RT2::ElementType ET2;
122  typedef typename MT1::CompositeType CT1;
123  typedef typename MT2::CompositeType CT2;
124  //**********************************************************************************************
125 
126  //**********************************************************************************************
129  //**********************************************************************************************
130 
131  //**********************************************************************************************
133  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
134  //**********************************************************************************************
135 
136  //**********************************************************************************************
138 
142  template< typename MT >
143  struct UseSMPAssign {
144  enum { value = ( evaluateLeft || evaluateRight ) };
145  };
147  //**********************************************************************************************
148 
149  //**********************************************************************************************
151 
154  template< typename T1, typename T2, typename T3 >
155  struct UseSinglePrecisionKernel {
156  enum { value = IsFloat<typename T1::ElementType>::value &&
157  IsFloat<typename T2::ElementType>::value &&
158  IsFloat<typename T3::ElementType>::value };
159  };
161  //**********************************************************************************************
162 
163  //**********************************************************************************************
165 
168  template< typename T1, typename T2, typename T3 >
169  struct UseDoublePrecisionKernel {
170  enum { value = IsDouble<typename T1::ElementType>::value &&
171  IsDouble<typename T2::ElementType>::value &&
172  IsDouble<typename T3::ElementType>::value };
173  };
175  //**********************************************************************************************
176 
177  //**********************************************************************************************
179 
183  template< typename T1, typename T2, typename T3 >
184  struct UseSinglePrecisionComplexKernel {
185  typedef complex<float> Type;
186  enum { value = IsSame<typename T1::ElementType,Type>::value &&
187  IsSame<typename T2::ElementType,Type>::value &&
188  IsSame<typename T3::ElementType,Type>::value };
189  };
191  //**********************************************************************************************
192 
193  //**********************************************************************************************
195 
199  template< typename T1, typename T2, typename T3 >
200  struct UseDoublePrecisionComplexKernel {
201  typedef complex<double> Type;
202  enum { value = IsSame<typename T1::ElementType,Type>::value &&
203  IsSame<typename T2::ElementType,Type>::value &&
204  IsSame<typename T3::ElementType,Type>::value };
205  };
207  //**********************************************************************************************
208 
209  //**********************************************************************************************
211 
214  template< typename T1, typename T2, typename T3 >
215  struct UseDefaultKernel {
216  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
217  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
218  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
219  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
220  };
222  //**********************************************************************************************
223 
224  //**********************************************************************************************
226 
229  template< typename T1, typename T2, typename T3 >
230  struct UseVectorizedDefaultKernel {
231  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
232  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
233  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
234  IntrinsicTrait<typename T1::ElementType>::addition &&
235  IntrinsicTrait<typename T1::ElementType>::subtraction &&
236  IntrinsicTrait<typename T1::ElementType>::multiplication };
237  };
239  //**********************************************************************************************
240 
241  public:
242  //**Type definitions****************************************************************************
249  typedef const ElementType ReturnType;
250  typedef const ResultType CompositeType;
251 
253  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
254 
256  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
257 
260 
263  //**********************************************************************************************
264 
265  //**Compilation flags***************************************************************************
267  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
271 
273  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
274  !evaluateRight && MT2::smpAssignable };
275  //**********************************************************************************************
276 
277  //**Constructor*********************************************************************************
283  explicit inline TDMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
284  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
285  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
286  {
287  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
288  }
289  //**********************************************************************************************
290 
291  //**Access operator*****************************************************************************
298  inline ReturnType operator()( size_t i, size_t j ) const {
299  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
300  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
301 
302  ElementType tmp;
303 
304  if( lhs_.columns() != 0UL ) {
305  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
306  tmp = lhs_(i,0UL) * rhs_(0UL,j);
307  for( size_t k=1UL; k<end; k+=2UL ) {
308  tmp += lhs_(i,k ) * rhs_(k ,j);
309  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
310  }
311  if( end < lhs_.columns() ) {
312  tmp += lhs_(i,end) * rhs_(end,j);
313  }
314  }
315  else {
316  reset( tmp );
317  }
318 
319  return tmp;
320  }
321  //**********************************************************************************************
322 
323  //**Rows function*******************************************************************************
328  inline size_t rows() const {
329  return lhs_.rows();
330  }
331  //**********************************************************************************************
332 
333  //**Columns function****************************************************************************
338  inline size_t columns() const {
339  return rhs_.columns();
340  }
341  //**********************************************************************************************
342 
343  //**Left operand access*************************************************************************
348  inline LeftOperand leftOperand() const {
349  return lhs_;
350  }
351  //**********************************************************************************************
352 
353  //**Right operand access************************************************************************
358  inline RightOperand rightOperand() const {
359  return rhs_;
360  }
361  //**********************************************************************************************
362 
363  //**********************************************************************************************
369  template< typename T >
370  inline bool canAlias( const T* alias ) const {
371  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
372  }
373  //**********************************************************************************************
374 
375  //**********************************************************************************************
381  template< typename T >
382  inline bool isAliased( const T* alias ) const {
383  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
384  }
385  //**********************************************************************************************
386 
387  //**********************************************************************************************
392  inline bool isAligned() const {
393  return lhs_.isAligned() && rhs_.isAligned();
394  }
395  //**********************************************************************************************
396 
397  //**********************************************************************************************
402  inline bool canSMPAssign() const {
403  return ( !BLAZE_BLAS_IS_PARALLEL ||
404  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
406  }
407  //**********************************************************************************************
408 
409  private:
410  //**Member variables****************************************************************************
413  //**********************************************************************************************
414 
415  //**Assignment to dense matrices****************************************************************
428  template< typename MT // Type of the target dense matrix
429  , bool SO > // Storage order of the target dense matrix
430  friend inline void assign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
431  {
433 
434  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
435  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
436 
437  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
438  return;
439  }
440  else if( rhs.lhs_.columns() == 0UL ) {
441  reset( ~lhs );
442  return;
443  }
444 
445  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
446  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
447 
448  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
449  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
450  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
451  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
452  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
453  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
454 
455  TDMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
456  }
458  //**********************************************************************************************
459 
460  //**Assignment to dense matrices (kernel selection)*********************************************
471  template< typename MT3 // Type of the left-hand side target matrix
472  , typename MT4 // Type of the left-hand side matrix operand
473  , typename MT5 > // Type of the right-hand side matrix operand
474  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
475  {
476  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
477  TDMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
478  else
479  TDMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
480  }
482  //**********************************************************************************************
483 
484  //**Default assignment to dense matrices********************************************************
498  template< typename MT3 // Type of the left-hand side target matrix
499  , typename MT4 // Type of the left-hand side matrix operand
500  , typename MT5 > // Type of the right-hand side matrix operand
501  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
502  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
503  {
504  const size_t M( A.rows() );
505  const size_t N( B.columns() );
506  const size_t K( A.columns() );
507 
508  for( size_t i=0UL; i<M; ++i ) {
509  for( size_t j=0UL; j<N; ++j ) {
510  C(i,j) = A(i,0UL) * B(0UL,j);
511  }
512  for( size_t k=1UL; k<K; ++k ) {
513  for( size_t j=0UL; j<N; ++j ) {
514  C(i,j) += A(i,k) * B(k,j);
515  }
516  }
517  }
518  }
520  //**********************************************************************************************
521 
522  //**Vectorized default assignment to row-major dense matrices***********************************
536  template< typename MT3 // Type of the left-hand side target matrix
537  , typename MT4 // Type of the left-hand side matrix operand
538  , typename MT5 > // Type of the right-hand side matrix operand
539  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
540  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
541  {
544 
545  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
546  const typename MT5::OppositeType tmp( serial( B ) );
547  assign( ~C, A * tmp );
548  }
549  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
550  const typename MT4::OppositeType tmp( serial( A ) );
551  assign( ~C, tmp * B );
552  }
553  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
554  const typename MT5::OppositeType tmp( serial( B ) );
555  assign( ~C, A * tmp );
556  }
557  else {
558  const typename MT4::OppositeType tmp( serial( A ) );
559  assign( ~C, tmp * B );
560  }
561  }
563  //**********************************************************************************************
564 
565  //**Vectorized default assignment to column-major dense matrices********************************
579  template< typename MT3 // Type of the left-hand side target matrix
580  , typename MT4 // Type of the left-hand side matrix operand
581  , typename MT5 > // Type of the right-hand side matrix operand
582  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
583  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
584  {
585  typedef IntrinsicTrait<ElementType> IT;
586 
587  const size_t M( A.rows() );
588  const size_t N( B.columns() );
589  const size_t K( A.columns() );
590 
591  size_t i( 0UL );
592 
593  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
594  for( size_t j=0UL; j<N; ++j ) {
595  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
596  for( size_t k=0UL; k<K; ++k ) {
597  const IntrinsicType b1( set( B(k,j) ) );
598  xmm1 = xmm1 + A.load(i ,k) * b1;
599  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
600  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
601  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
602  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
603  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
604  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
605  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
606  }
607  (~C).store( i , j, xmm1 );
608  (~C).store( i+IT::size , j, xmm2 );
609  (~C).store( i+IT::size*2UL, j, xmm3 );
610  (~C).store( i+IT::size*3UL, j, xmm4 );
611  (~C).store( i+IT::size*4UL, j, xmm5 );
612  (~C).store( i+IT::size*5UL, j, xmm6 );
613  (~C).store( i+IT::size*6UL, j, xmm7 );
614  (~C).store( i+IT::size*7UL, j, xmm8 );
615  }
616  }
617  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
618  size_t j( 0UL );
619  for( ; (j+2UL) <= N; j+=2UL ) {
620  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
621  for( size_t k=0UL; k<K; ++k ) {
622  const IntrinsicType a1( A.load(i ,k) );
623  const IntrinsicType a2( A.load(i+IT::size ,k) );
624  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
625  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
626  const IntrinsicType b1( set( B(k,j ) ) );
627  const IntrinsicType b2( set( B(k,j+1UL) ) );
628  xmm1 = xmm1 + a1 * b1;
629  xmm2 = xmm2 + a2 * b1;
630  xmm3 = xmm3 + a3 * b1;
631  xmm4 = xmm4 + a4 * b1;
632  xmm5 = xmm5 + a1 * b2;
633  xmm6 = xmm6 + a2 * b2;
634  xmm7 = xmm7 + a3 * b2;
635  xmm8 = xmm8 + a4 * b2;
636  }
637  (~C).store( i , j , xmm1 );
638  (~C).store( i+IT::size , j , xmm2 );
639  (~C).store( i+IT::size*2UL, j , xmm3 );
640  (~C).store( i+IT::size*3UL, j , xmm4 );
641  (~C).store( i , j+1UL, xmm5 );
642  (~C).store( i+IT::size , j+1UL, xmm6 );
643  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
644  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
645  }
646  if( j < N ) {
647  IntrinsicType xmm1, xmm2, xmm3, xmm4;
648  for( size_t k=0UL; k<K; ++k ) {
649  const IntrinsicType b1( set( B(k,j) ) );
650  xmm1 = xmm1 + A.load(i ,k) * b1;
651  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
652  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
653  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
654  }
655  (~C).store( i , j, xmm1 );
656  (~C).store( i+IT::size , j, xmm2 );
657  (~C).store( i+IT::size*2UL, j, xmm3 );
658  (~C).store( i+IT::size*3UL, j, xmm4 );
659  }
660  }
661  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
662  size_t j( 0UL );
663  for( ; (j+2UL) <= N; j+=2UL ) {
664  IntrinsicType xmm1, xmm2, xmm3, xmm4;
665  for( size_t k=0UL; k<K; ++k ) {
666  const IntrinsicType a1( A.load(i ,k) );
667  const IntrinsicType a2( A.load(i+IT::size,k) );
668  const IntrinsicType b1( set( B(k,j ) ) );
669  const IntrinsicType b2( set( B(k,j+1UL) ) );
670  xmm1 = xmm1 + a1 * b1;
671  xmm2 = xmm2 + a2 * b1;
672  xmm3 = xmm3 + a1 * b2;
673  xmm4 = xmm4 + a2 * b2;
674  }
675  (~C).store( i , j , xmm1 );
676  (~C).store( i+IT::size, j , xmm2 );
677  (~C).store( i , j+1UL, xmm3 );
678  (~C).store( i+IT::size, j+1UL, xmm4 );
679  }
680  if( j < N ) {
681  IntrinsicType xmm1, xmm2;
682  for( size_t k=0UL; k<K; ++k ) {
683  const IntrinsicType b1( set( B(k,j) ) );
684  xmm1 = xmm1 + A.load(i ,k) * b1;
685  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
686  }
687  (~C).store( i , j, xmm1 );
688  (~C).store( i+IT::size, j, xmm2 );
689  }
690  }
691  if( i < M ) {
692  size_t j( 0UL );
693  for( ; (j+2UL) <= N; j+=2UL ) {
694  IntrinsicType xmm1, xmm2;
695  for( size_t k=0UL; k<K; ++k ) {
696  const IntrinsicType a1( A.load(i,k) );
697  xmm1 = xmm1 + a1 * set( B(k,j ) );
698  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
699  }
700  (~C).store( i, j , xmm1 );
701  (~C).store( i, j+1UL, xmm2 );
702  }
703  if( j < N ) {
704  IntrinsicType xmm1;
705  for( size_t k=0UL; k<K; ++k ) {
706  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
707  }
708  (~C).store( i, j, xmm1 );
709  }
710  }
711  }
713  //**********************************************************************************************
714 
715  //**BLAS-based assignment to dense matrices (default)*******************************************
729  template< typename MT3 // Type of the left-hand side target matrix
730  , typename MT4 // Type of the left-hand side matrix operand
731  , typename MT5 > // Type of the right-hand side matrix operand
732  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
733  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
734  {
735  selectDefaultAssignKernel( C, A, B );
736  }
738  //**********************************************************************************************
739 
740  //**BLAS-based assignment to dense matrices (single precision)**********************************
741 #if BLAZE_BLAS_MODE
742 
755  template< typename MT3 // Type of the left-hand side target matrix
756  , typename MT4 // Type of the left-hand side matrix operand
757  , typename MT5 > // Type of the right-hand side matrix operand
758  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
759  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
760  {
761  using boost::numeric_cast;
762 
766 
767  const int M ( numeric_cast<int>( A.rows() ) );
768  const int N ( numeric_cast<int>( B.columns() ) );
769  const int K ( numeric_cast<int>( A.columns() ) );
770  const int lda( numeric_cast<int>( A.spacing() ) );
771  const int ldb( numeric_cast<int>( B.spacing() ) );
772  const int ldc( numeric_cast<int>( C.spacing() ) );
773 
774  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
775  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
776  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
777  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
778  }
780 #endif
781  //**********************************************************************************************
782 
783  //**BLAS-based assignment to dense matrices (double precision)**********************************
784 #if BLAZE_BLAS_MODE
785 
798  template< typename MT3 // Type of the left-hand side target matrix
799  , typename MT4 // Type of the left-hand side matrix operand
800  , typename MT5 > // Type of the right-hand side matrix operand
801  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
802  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
803  {
804  using boost::numeric_cast;
805 
809 
810  const int M ( numeric_cast<int>( A.rows() ) );
811  const int N ( numeric_cast<int>( B.columns() ) );
812  const int K ( numeric_cast<int>( A.columns() ) );
813  const int lda( numeric_cast<int>( A.spacing() ) );
814  const int ldb( numeric_cast<int>( B.spacing() ) );
815  const int ldc( numeric_cast<int>( C.spacing() ) );
816 
817  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
818  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
819  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
820  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
821  }
823 #endif
824  //**********************************************************************************************
825 
826  //**BLAS-based assignment to dense matrices (single precision complex)**************************
827 #if BLAZE_BLAS_MODE
828 
841  template< typename MT3 // Type of the left-hand side target matrix
842  , typename MT4 // Type of the left-hand side matrix operand
843  , typename MT5 > // Type of the right-hand side matrix operand
844  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
845  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
846  {
847  using boost::numeric_cast;
848 
852  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
853  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
854  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
855 
856  const int M ( numeric_cast<int>( A.rows() ) );
857  const int N ( numeric_cast<int>( B.columns() ) );
858  const int K ( numeric_cast<int>( A.columns() ) );
859  const int lda( numeric_cast<int>( A.spacing() ) );
860  const int ldb( numeric_cast<int>( B.spacing() ) );
861  const int ldc( numeric_cast<int>( C.spacing() ) );
862  complex<float> alpha( 1.0F, 0.0F );
863  complex<float> beta ( 0.0F, 0.0F );
864 
865  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
866  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
867  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
868  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
869  }
871 #endif
872  //**********************************************************************************************
873 
874  //**BLAS-based assignment to dense matrices (double precision complex)**************************
875 #if BLAZE_BLAS_MODE
876 
889  template< typename MT3 // Type of the left-hand side target matrix
890  , typename MT4 // Type of the left-hand side matrix operand
891  , typename MT5 > // Type of the right-hand side matrix operand
892  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
893  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
894  {
895  using boost::numeric_cast;
896 
900  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
901  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
902  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
903 
904  const int M ( numeric_cast<int>( A.rows() ) );
905  const int N ( numeric_cast<int>( B.columns() ) );
906  const int K ( numeric_cast<int>( A.columns() ) );
907  const int lda( numeric_cast<int>( A.spacing() ) );
908  const int ldb( numeric_cast<int>( B.spacing() ) );
909  const int ldc( numeric_cast<int>( C.spacing() ) );
910  complex<double> alpha( 1.0, 0.0 );
911  complex<double> beta ( 0.0, 0.0 );
912 
913  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
914  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
915  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
916  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
917  }
919 #endif
920  //**********************************************************************************************
921 
922  //**Assignment to sparse matrices***************************************************************
935  template< typename MT // Type of the target sparse matrix
936  , bool SO > // Storage order of the target sparse matrix
937  friend inline void assign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
938  {
940 
941  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
942 
949 
950  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
951  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
952 
953  const TmpType tmp( serial( rhs ) );
954  assign( ~lhs, tmp );
955  }
957  //**********************************************************************************************
958 
959  //**Addition assignment to dense matrices*******************************************************
972  template< typename MT // Type of the target dense matrix
973  , bool SO > // Storage order of the target dense matrix
974  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
975  {
977 
978  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
979  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
980 
981  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
982  return;
983  }
984 
985  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
986  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
987 
988  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
989  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
990  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
991  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
992  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
993  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
994 
995  TDMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
996  }
998  //**********************************************************************************************
999 
1000  //**Addition assignment to dense matrices (kernel selection)************************************
1011  template< typename MT3 // Type of the left-hand side target matrix
1012  , typename MT4 // Type of the left-hand side matrix operand
1013  , typename MT5 > // Type of the right-hand side matrix operand
1014  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1015  {
1016  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
1017  TDMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1018  else
1019  TDMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1020  }
1022  //**********************************************************************************************
1023 
1024  //**Default addition assignment to dense matrices***********************************************
1038  template< typename MT3 // Type of the left-hand side target matrix
1039  , typename MT4 // Type of the left-hand side matrix operand
1040  , typename MT5 > // Type of the right-hand side matrix operand
1041  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1042  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1043  {
1044  const size_t M( A.rows() );
1045  const size_t N( B.columns() );
1046  const size_t K( A.columns() );
1047 
1048  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1049  const size_t end( N & size_t(-2) );
1050 
1051  for( size_t i=0UL; i<M; ++i ) {
1052  for( size_t k=0UL; k<K; ++k ) {
1053  for( size_t j=0UL; j<end; j+=2UL ) {
1054  C(i,j ) += A(i,k) * B(k,j );
1055  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1056  }
1057  if( end < N ) {
1058  C(i,end) += A(i,k) * B(k,end);
1059  }
1060  }
1061  }
1062  }
1064  //**********************************************************************************************
1065 
1066  //**Vectorized default addition assignment to row-major dense matrices**************************
1080  template< typename MT3 // Type of the left-hand side target matrix
1081  , typename MT4 // Type of the left-hand side matrix operand
1082  , typename MT5 > // Type of the right-hand side matrix operand
1083  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1084  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1085  {
1088 
1089  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1090  const typename MT5::OppositeType tmp( serial( B ) );
1091  addAssign( ~C, A * tmp );
1092  }
1093  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1094  const typename MT4::OppositeType tmp( serial( A ) );
1095  addAssign( ~C, tmp * B );
1096  }
1097  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1098  const typename MT5::OppositeType tmp( serial( B ) );
1099  addAssign( ~C, A * tmp );
1100  }
1101  else {
1102  const typename MT4::OppositeType tmp( serial( A ) );
1103  addAssign( ~C, tmp * B );
1104  }
1105  }
1107  //**********************************************************************************************
1108 
1109  //**Vectorized default addition assignment to column-major dense matrices***********************
1123  template< typename MT3 // Type of the left-hand side target matrix
1124  , typename MT4 // Type of the left-hand side matrix operand
1125  , typename MT5 > // Type of the right-hand side matrix operand
1126  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1127  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1128  {
1129  typedef IntrinsicTrait<ElementType> IT;
1130 
1131  const size_t M( A.rows() );
1132  const size_t N( B.columns() );
1133  const size_t K( A.columns() );
1134 
1135  size_t i( 0UL );
1136 
1137  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1138  for( size_t j=0UL; j<N; ++j ) {
1139  IntrinsicType xmm1( (~C).load(i ,j) );
1140  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1141  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1142  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1143  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
1144  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
1145  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
1146  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
1147  for( size_t k=0UL; k<K; ++k ) {
1148  const IntrinsicType b1( set( B(k,j) ) );
1149  xmm1 = xmm1 + A.load(i ,k) * b1;
1150  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1151  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1152  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1153  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
1154  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
1155  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
1156  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
1157  }
1158  (~C).store( i , j, xmm1 );
1159  (~C).store( i+IT::size , j, xmm2 );
1160  (~C).store( i+IT::size*2UL, j, xmm3 );
1161  (~C).store( i+IT::size*3UL, j, xmm4 );
1162  (~C).store( i+IT::size*4UL, j, xmm5 );
1163  (~C).store( i+IT::size*5UL, j, xmm6 );
1164  (~C).store( i+IT::size*6UL, j, xmm7 );
1165  (~C).store( i+IT::size*7UL, j, xmm8 );
1166  }
1167  }
1168  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1169  size_t j( 0UL );
1170  for( ; (j+2UL) <= N; j+=2UL ) {
1171  IntrinsicType xmm1( (~C).load(i ,j ) );
1172  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
1173  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
1174  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
1175  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1176  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
1177  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
1178  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
1179  for( size_t k=0UL; k<K; ++k ) {
1180  const IntrinsicType a1( A.load(i ,k) );
1181  const IntrinsicType a2( A.load(i+IT::size ,k) );
1182  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
1183  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
1184  const IntrinsicType b1( set( B(k,j ) ) );
1185  const IntrinsicType b2( set( B(k,j+1UL) ) );
1186  xmm1 = xmm1 + a1 * b1;
1187  xmm2 = xmm2 + a2 * b1;
1188  xmm3 = xmm3 + a3 * b1;
1189  xmm4 = xmm4 + a4 * b1;
1190  xmm5 = xmm5 + a1 * b2;
1191  xmm6 = xmm6 + a2 * b2;
1192  xmm7 = xmm7 + a3 * b2;
1193  xmm8 = xmm8 + a4 * b2;
1194  }
1195  (~C).store( i , j , xmm1 );
1196  (~C).store( i+IT::size , j , xmm2 );
1197  (~C).store( i+IT::size*2UL, j , xmm3 );
1198  (~C).store( i+IT::size*3UL, j , xmm4 );
1199  (~C).store( i , j+1UL, xmm5 );
1200  (~C).store( i+IT::size , j+1UL, xmm6 );
1201  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
1202  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
1203  }
1204  if( j < N ) {
1205  IntrinsicType xmm1( (~C).load(i ,j) );
1206  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1207  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1208  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1209  for( size_t k=0UL; k<K; ++k ) {
1210  const IntrinsicType b1( set( B(k,j) ) );
1211  xmm1 = xmm1 + A.load(i ,k) * b1;
1212  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1213  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1214  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1215  }
1216  (~C).store( i , j, xmm1 );
1217  (~C).store( i+IT::size , j, xmm2 );
1218  (~C).store( i+IT::size*2UL, j, xmm3 );
1219  (~C).store( i+IT::size*3UL, j, xmm4 );
1220  }
1221  }
1222  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1223  size_t j( 0UL );
1224  for( ; (j+2UL) <= N; j+=2UL ) {
1225  IntrinsicType xmm1( (~C).load(i ,j ) );
1226  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
1227  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1228  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
1229  for( size_t k=0UL; k<K; ++k ) {
1230  const IntrinsicType a1( A.load(i ,k) );
1231  const IntrinsicType a2( A.load(i+IT::size,k) );
1232  const IntrinsicType b1( set( B(k,j ) ) );
1233  const IntrinsicType b2( set( B(k,j+1UL) ) );
1234  xmm1 = xmm1 + a1 * b1;
1235  xmm2 = xmm2 + a2 * b1;
1236  xmm3 = xmm3 + a1 * b2;
1237  xmm4 = xmm4 + a2 * b2;
1238  }
1239  (~C).store( i , j , xmm1 );
1240  (~C).store( i+IT::size, j , xmm2 );
1241  (~C).store( i , j+1UL, xmm3 );
1242  (~C).store( i+IT::size, j+1UL, xmm4 );
1243  }
1244  if( j < N ) {
1245  IntrinsicType xmm1( (~C).load(i ,j) );
1246  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
1247  for( size_t k=0UL; k<K; ++k ) {
1248  const IntrinsicType b1( set( B(k,j) ) );
1249  xmm1 = xmm1 + A.load(i ,k) * b1;
1250  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1251  }
1252  (~C).store( i , j, xmm1 );
1253  (~C).store( i+IT::size, j, xmm2 );
1254  }
1255  }
1256  if( i < M ) {
1257  size_t j( 0UL );
1258  for( ; (j+2UL) <= N; j+=2UL ) {
1259  IntrinsicType xmm1( (~C).load(i,j ) );
1260  IntrinsicType xmm2( (~C).load(i,j+1UL) );
1261  for( size_t k=0UL; k<K; ++k ) {
1262  const IntrinsicType a1( A.load(i,k) );
1263  xmm1 = xmm1 + a1 * set( B(k,j ) );
1264  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1265  }
1266  (~C).store( i, j , xmm1 );
1267  (~C).store( i, j+1UL, xmm2 );
1268  }
1269  if( j < N ) {
1270  IntrinsicType xmm1( (~C).load(i,j) );
1271  for( size_t k=0UL; k<K; ++k ) {
1272  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
1273  }
1274  (~C).store( i, j, xmm1 );
1275  }
1276  }
1277  }
1279  //**********************************************************************************************
1280 
1281  //**BLAS-based addition assignment to dense matrices (default)**********************************
1295  template< typename MT3 // Type of the left-hand side target matrix
1296  , typename MT4 // Type of the left-hand side matrix operand
1297  , typename MT5 > // Type of the right-hand side matrix operand
1298  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1299  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1300  {
1301  selectDefaultAddAssignKernel( C, A, B );
1302  }
1304  //**********************************************************************************************
1305 
1306  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1307 #if BLAZE_BLAS_MODE
1308 
1321  template< typename MT3 // Type of the left-hand side target matrix
1322  , typename MT4 // Type of the left-hand side matrix operand
1323  , typename MT5 > // Type of the right-hand side matrix operand
1324  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1325  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1326  {
1327  using boost::numeric_cast;
1328 
1332 
1333  const int M ( numeric_cast<int>( A.rows() ) );
1334  const int N ( numeric_cast<int>( B.columns() ) );
1335  const int K ( numeric_cast<int>( A.columns() ) );
1336  const int lda( numeric_cast<int>( A.spacing() ) );
1337  const int ldb( numeric_cast<int>( B.spacing() ) );
1338  const int ldc( numeric_cast<int>( C.spacing() ) );
1339 
1340  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1341  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1342  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1343  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1344  }
1346 #endif
1347  //**********************************************************************************************
1348 
1349  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1350 #if BLAZE_BLAS_MODE
1351 
1364  template< typename MT3 // Type of the left-hand side target matrix
1365  , typename MT4 // Type of the left-hand side matrix operand
1366  , typename MT5 > // Type of the right-hand side matrix operand
1367  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1368  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1369  {
1370  using boost::numeric_cast;
1371 
1375 
1376  const int M ( numeric_cast<int>( A.rows() ) );
1377  const int N ( numeric_cast<int>( B.columns() ) );
1378  const int K ( numeric_cast<int>( A.columns() ) );
1379  const int lda( numeric_cast<int>( A.spacing() ) );
1380  const int ldb( numeric_cast<int>( B.spacing() ) );
1381  const int ldc( numeric_cast<int>( C.spacing() ) );
1382 
1383  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1384  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1385  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1386  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1387  }
1389 #endif
1390  //**********************************************************************************************
1391 
1392  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1393 #if BLAZE_BLAS_MODE
1394 
1407  template< typename MT3 // Type of the left-hand side target matrix
1408  , typename MT4 // Type of the left-hand side matrix operand
1409  , typename MT5 > // Type of the right-hand side matrix operand
1410  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1411  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1412  {
1413  using boost::numeric_cast;
1414 
1418  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1419  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1420  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1421 
1422  const int M ( numeric_cast<int>( A.rows() ) );
1423  const int N ( numeric_cast<int>( B.columns() ) );
1424  const int K ( numeric_cast<int>( A.columns() ) );
1425  const int lda( numeric_cast<int>( A.spacing() ) );
1426  const int ldb( numeric_cast<int>( B.spacing() ) );
1427  const int ldc( numeric_cast<int>( C.spacing() ) );
1428  const complex<float> alpha( 1.0F, 0.0F );
1429  const complex<float> beta ( 1.0F, 0.0F );
1430 
1431  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1432  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1433  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1434  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1435  }
1437 #endif
1438  //**********************************************************************************************
1439 
1440  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1441 #if BLAZE_BLAS_MODE
1442 
1455  template< typename MT3 // Type of the left-hand side target matrix
1456  , typename MT4 // Type of the left-hand side matrix operand
1457  , typename MT5 > // Type of the right-hand side matrix operand
1458  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1459  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1460  {
1461  using boost::numeric_cast;
1462 
1466  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1467  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1468  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1469 
1470  const int M ( numeric_cast<int>( A.rows() ) );
1471  const int N ( numeric_cast<int>( B.columns() ) );
1472  const int K ( numeric_cast<int>( A.columns() ) );
1473  const int lda( numeric_cast<int>( A.spacing() ) );
1474  const int ldb( numeric_cast<int>( B.spacing() ) );
1475  const int ldc( numeric_cast<int>( C.spacing() ) );
1476  const complex<double> alpha( 1.0, 0.0 );
1477  const complex<double> beta ( 1.0, 0.0 );
1478 
1479  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1480  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1481  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1482  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1483  }
1485 #endif
1486  //**********************************************************************************************
1487 
1488  //**Addition assignment to sparse matrices******************************************************
1489  // No special implementation for the addition assignment to sparse matrices.
1490  //**********************************************************************************************
1491 
1492  //**Subtraction assignment to dense matrices****************************************************
1505  template< typename MT // Type of the target dense matrix
1506  , bool SO > // Storage order of the target dense matrix
1507  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1508  {
1510 
1511  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1512  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1513 
1514  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1515  return;
1516  }
1517 
1518  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1519  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1520 
1521  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1522  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1523  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1524  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1525  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1526  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1527 
1528  TDMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1529  }
1531  //**********************************************************************************************
1532 
1533  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1544  template< typename MT3 // Type of the left-hand side target matrix
1545  , typename MT4 // Type of the left-hand side matrix operand
1546  , typename MT5 > // Type of the right-hand side matrix operand
1547  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1548  {
1549  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
1550  TDMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1551  else
1552  TDMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1553  }
1555  //**********************************************************************************************
1556 
1557  //**Default subtraction assignment to dense matrices********************************************
1571  template< typename MT3 // Type of the left-hand side target matrix
1572  , typename MT4 // Type of the left-hand side matrix operand
1573  , typename MT5 > // Type of the right-hand side matrix operand
1574  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1575  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1576  {
1577  const size_t M( A.rows() );
1578  const size_t N( B.columns() );
1579  const size_t K( A.columns() );
1580 
1581  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1582  const size_t end( N & size_t(-2) );
1583 
1584  for( size_t i=0UL; i<M; ++i ) {
1585  for( size_t k=0UL; k<K; ++k ) {
1586  for( size_t j=0UL; j<end; j+=2UL ) {
1587  C(i,j ) -= A(i,k) * B(k,j );
1588  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1589  }
1590  if( end < N ) {
1591  C(i,end) -= A(i,k) * B(k,end);
1592  }
1593  }
1594  }
1595  }
1597  //**********************************************************************************************
1598 
1599  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1613  template< typename MT3 // Type of the left-hand side target matrix
1614  , typename MT4 // Type of the left-hand side matrix operand
1615  , typename MT5 > // Type of the right-hand side matrix operand
1616  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1617  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1618  {
1621 
1622  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1623  const typename MT5::OppositeType tmp( serial( B ) );
1624  subAssign( ~C, A * tmp );
1625  }
1626  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1627  const typename MT4::OppositeType tmp( serial( A ) );
1628  subAssign( ~C, tmp * B );
1629  }
1630  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1631  const typename MT5::OppositeType tmp( serial( B ) );
1632  subAssign( ~C, A * tmp );
1633  }
1634  else {
1635  const typename MT4::OppositeType tmp( serial( A ) );
1636  subAssign( ~C, tmp * B );
1637  }
1638  }
1640  //**********************************************************************************************
1641 
1642  //**Vectorized default subtraction assignment to column-major dense matrices********************
1656  template< typename MT3 // Type of the left-hand side target matrix
1657  , typename MT4 // Type of the left-hand side matrix operand
1658  , typename MT5 > // Type of the right-hand side matrix operand
1659  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1660  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1661  {
1662  typedef IntrinsicTrait<ElementType> IT;
1663 
1664  const size_t M( A.rows() );
1665  const size_t N( B.columns() );
1666  const size_t K( A.columns() );
1667 
1668  size_t i( 0UL );
1669 
1670  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1671  for( size_t j=0UL; j<N; ++j ) {
1672  IntrinsicType xmm1( (~C).load(i ,j) );
1673  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1674  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1675  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1676  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
1677  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
1678  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
1679  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
1680  for( size_t k=0UL; k<K; ++k ) {
1681  const IntrinsicType b1( set( B(k,j) ) );
1682  xmm1 = xmm1 - A.load(i ,k) * b1;
1683  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1684  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1685  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1686  xmm5 = xmm5 - A.load(i+IT::size*4UL,k) * b1;
1687  xmm6 = xmm6 - A.load(i+IT::size*5UL,k) * b1;
1688  xmm7 = xmm7 - A.load(i+IT::size*6UL,k) * b1;
1689  xmm8 = xmm8 - A.load(i+IT::size*7UL,k) * b1;
1690  }
1691  (~C).store( i , j, xmm1 );
1692  (~C).store( i+IT::size , j, xmm2 );
1693  (~C).store( i+IT::size*2UL, j, xmm3 );
1694  (~C).store( i+IT::size*3UL, j, xmm4 );
1695  (~C).store( i+IT::size*4UL, j, xmm5 );
1696  (~C).store( i+IT::size*5UL, j, xmm6 );
1697  (~C).store( i+IT::size*6UL, j, xmm7 );
1698  (~C).store( i+IT::size*7UL, j, xmm8 );
1699  }
1700  }
1701  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1702  size_t j( 0UL );
1703  for( ; (j+2UL) <= N; j+=2UL ) {
1704  IntrinsicType xmm1( (~C).load(i ,j ) );
1705  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
1706  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
1707  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
1708  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1709  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
1710  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
1711  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
1712  for( size_t k=0UL; k<K; ++k ) {
1713  const IntrinsicType a1( A.load(i ,k) );
1714  const IntrinsicType a2( A.load(i+IT::size ,k) );
1715  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
1716  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
1717  const IntrinsicType b1( set( B(k,j ) ) );
1718  const IntrinsicType b2( set( B(k,j+1UL) ) );
1719  xmm1 = xmm1 - a1 * b1;
1720  xmm2 = xmm2 - a2 * b1;
1721  xmm3 = xmm3 - a3 * b1;
1722  xmm4 = xmm4 - a4 * b1;
1723  xmm5 = xmm5 - a1 * b2;
1724  xmm6 = xmm6 - a2 * b2;
1725  xmm7 = xmm7 - a3 * b2;
1726  xmm8 = xmm8 - a4 * b2;
1727  }
1728  (~C).store( i , j , xmm1 );
1729  (~C).store( i+IT::size , j , xmm2 );
1730  (~C).store( i+IT::size*2UL, j , xmm3 );
1731  (~C).store( i+IT::size*3UL, j , xmm4 );
1732  (~C).store( i , j+1UL, xmm5 );
1733  (~C).store( i+IT::size , j+1UL, xmm6 );
1734  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
1735  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
1736  }
1737  if( j < N ) {
1738  IntrinsicType xmm1( (~C).load(i ,j) );
1739  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
1740  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
1741  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
1742  for( size_t k=0UL; k<K; ++k ) {
1743  const IntrinsicType b1( set( B(k,j) ) );
1744  xmm1 = xmm1 - A.load(i ,k) * b1;
1745  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1746  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1747  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1748  }
1749  (~C).store( i , j, xmm1 );
1750  (~C).store( i+IT::size , j, xmm2 );
1751  (~C).store( i+IT::size*2UL, j, xmm3 );
1752  (~C).store( i+IT::size*3UL, j, xmm4 );
1753  }
1754  }
1755  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1756  size_t j( 0UL );
1757  for( ; (j+2UL) <= N; j+=2UL ) {
1758  IntrinsicType xmm1( (~C).load(i ,j ) );
1759  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
1760  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1761  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
1762  for( size_t k=0UL; k<K; ++k ) {
1763  const IntrinsicType a1( A.load(i ,k) );
1764  const IntrinsicType a2( A.load(i+IT::size,k) );
1765  const IntrinsicType b1( set( B(k,j ) ) );
1766  const IntrinsicType b2( set( B(k,j+1UL) ) );
1767  xmm1 = xmm1 - a1 * b1;
1768  xmm2 = xmm2 - a2 * b1;
1769  xmm3 = xmm3 - a1 * b2;
1770  xmm4 = xmm4 - a2 * b2;
1771  }
1772  (~C).store( i , j , xmm1 );
1773  (~C).store( i+IT::size, j , xmm2 );
1774  (~C).store( i , j+1UL, xmm3 );
1775  (~C).store( i+IT::size, j+1UL, xmm4 );
1776  }
1777  if( j < N ) {
1778  IntrinsicType xmm1( (~C).load(i ,j) );
1779  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
1780  for( size_t k=0UL; k<K; ++k ) {
1781  const IntrinsicType b1( set( B(k,j) ) );
1782  xmm1 = xmm1 - A.load(i ,k) * b1;
1783  xmm2 = xmm2 - A.load(i+IT::size,k) * b1;
1784  }
1785  (~C).store( i , j, xmm1 );
1786  (~C).store( i+IT::size, j, xmm2 );
1787  }
1788  }
1789  if( i < M ) {
1790  size_t j( 0UL );
1791  for( ; (j+2UL) <= N; j+=2UL ) {
1792  IntrinsicType xmm1( (~C).load(i,j ) );
1793  IntrinsicType xmm2( (~C).load(i,j+1UL) );
1794  for( size_t k=0UL; k<K; ++k ) {
1795  const IntrinsicType a1( A.load(i,k) );
1796  xmm1 = xmm1 - a1 * set( B(k,j ) );
1797  xmm2 = xmm2 - a1 * set( B(k,j+1UL) );
1798  }
1799  (~C).store( i, j , xmm1 );
1800  (~C).store( i, j+1UL, xmm2 );
1801  }
1802  if( j < N ) {
1803  IntrinsicType xmm1( (~C).load(i,j) );
1804  for( size_t k=0UL; k<K; ++k ) {
1805  xmm1 = xmm1 - A.load(i,k) * set( B(k,j) );
1806  }
1807  (~C).store( i, j, xmm1 );
1808  }
1809  }
1810  }
1812  //**********************************************************************************************
1813 
1814  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1828  template< typename MT3 // Type of the left-hand side target matrix
1829  , typename MT4 // Type of the left-hand side matrix operand
1830  , typename MT5 > // Type of the right-hand side matrix operand
1831  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1832  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1833  {
1834  selectDefaultSubAssignKernel( C, A, B );
1835  }
1837  //**********************************************************************************************
1838 
1839  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1840 #if BLAZE_BLAS_MODE
1841 
1854  template< typename MT3 // Type of the left-hand side target matrix
1855  , typename MT4 // Type of the left-hand side matrix operand
1856  , typename MT5 > // Type of the right-hand side matrix operand
1857  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1858  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1859  {
1860  using boost::numeric_cast;
1861 
1865 
1866  const int M ( numeric_cast<int>( A.rows() ) );
1867  const int N ( numeric_cast<int>( B.columns() ) );
1868  const int K ( numeric_cast<int>( A.columns() ) );
1869  const int lda( numeric_cast<int>( A.spacing() ) );
1870  const int ldb( numeric_cast<int>( B.spacing() ) );
1871  const int ldc( numeric_cast<int>( C.spacing() ) );
1872 
1873  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1874  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1875  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1876  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1877  }
1879 #endif
1880  //**********************************************************************************************
1881 
1882  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1883 #if BLAZE_BLAS_MODE
1884 
1897  template< typename MT3 // Type of the left-hand side target matrix
1898  , typename MT4 // Type of the left-hand side matrix operand
1899  , typename MT5 > // Type of the right-hand side matrix operand
1900  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1901  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1902  {
1903  using boost::numeric_cast;
1904 
1908 
1909  const int M ( numeric_cast<int>( A.rows() ) );
1910  const int N ( numeric_cast<int>( B.columns() ) );
1911  const int K ( numeric_cast<int>( A.columns() ) );
1912  const int lda( numeric_cast<int>( A.spacing() ) );
1913  const int ldb( numeric_cast<int>( B.spacing() ) );
1914  const int ldc( numeric_cast<int>( C.spacing() ) );
1915 
1916  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1917  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1918  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1919  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1920  }
1922 #endif
1923  //**********************************************************************************************
1924 
1925  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1926 #if BLAZE_BLAS_MODE
1927 
1940  template< typename MT3 // Type of the left-hand side target matrix
1941  , typename MT4 // Type of the left-hand side matrix operand
1942  , typename MT5 > // Type of the right-hand side matrix operand
1943  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1944  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1945  {
1946  using boost::numeric_cast;
1947 
1951  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1952  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1953  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1954 
1955  const int M ( numeric_cast<int>( A.rows() ) );
1956  const int N ( numeric_cast<int>( B.columns() ) );
1957  const int K ( numeric_cast<int>( A.columns() ) );
1958  const int lda( numeric_cast<int>( A.spacing() ) );
1959  const int ldb( numeric_cast<int>( B.spacing() ) );
1960  const int ldc( numeric_cast<int>( C.spacing() ) );
1961  const complex<float> alpha( -1.0F, 0.0F );
1962  const complex<float> beta ( 1.0F, 0.0F );
1963 
1964  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1965  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1966  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1967  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1968  }
1970 #endif
1971  //**********************************************************************************************
1972 
1973  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1974 #if BLAZE_BLAS_MODE
1975 
1988  template< typename MT3 // Type of the left-hand side target matrix
1989  , typename MT4 // Type of the left-hand side matrix operand
1990  , typename MT5 > // Type of the right-hand side matrix operand
1991  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1992  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1993  {
1994  using boost::numeric_cast;
1995 
1999  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2000  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2001  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2002 
2003  const int M ( numeric_cast<int>( A.rows() ) );
2004  const int N ( numeric_cast<int>( B.columns() ) );
2005  const int K ( numeric_cast<int>( A.columns() ) );
2006  const int lda( numeric_cast<int>( A.spacing() ) );
2007  const int ldb( numeric_cast<int>( B.spacing() ) );
2008  const int ldc( numeric_cast<int>( C.spacing() ) );
2009  const complex<double> alpha( -1.0, 0.0 );
2010  const complex<double> beta ( 1.0, 0.0 );
2011 
2012  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2013  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2014  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2015  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2016  }
2018 #endif
2019  //**********************************************************************************************
2020 
2021  //**Subtraction assignment to sparse matrices***************************************************
2022  // No special implementation for the subtraction assignment to sparse matrices.
2023  //**********************************************************************************************
2024 
2025  //**Multiplication assignment to dense matrices*************************************************
2026  // No special implementation for the multiplication assignment to dense matrices.
2027  //**********************************************************************************************
2028 
2029  //**Multiplication assignment to sparse matrices************************************************
2030  // No special implementation for the multiplication assignment to sparse matrices.
2031  //**********************************************************************************************
2032 
2033  //**SMP assignment to dense matrices************************************************************
2048  template< typename MT // Type of the target dense matrix
2049  , bool SO > // Storage order of the target dense matrix
2050  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2051  smpAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2052  {
2054 
2055  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2056  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2057 
2058  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2059  return;
2060  }
2061  else if( rhs.lhs_.columns() == 0UL ) {
2062  reset( ~lhs );
2063  return;
2064  }
2065 
2066  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2067  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2068 
2069  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2070  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2071  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2072  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2073  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2074  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2075 
2076  smpAssign( ~lhs, A * B );
2077  }
2079  //**********************************************************************************************
2080 
2081  //**SMP assignment to sparse matrices***********************************************************
2096  template< typename MT // Type of the target sparse matrix
2097  , bool SO > // Storage order of the target sparse matrix
2098  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2099  smpAssign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2100  {
2102 
2103  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2104 
2111 
2112  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2113  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2114 
2115  const TmpType tmp( rhs );
2116  smpAssign( ~lhs, tmp );
2117  }
2119  //**********************************************************************************************
2120 
2121  //**SMP addition assignment to dense matrices***************************************************
2136  template< typename MT // Type of the target dense matrix
2137  , bool SO > // Storage order of the target dense matrix
2138  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2139  smpAddAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2140  {
2142 
2143  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2144  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2145 
2146  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2147  return;
2148  }
2149 
2150  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2151  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2152 
2153  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2154  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2155  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2156  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2157  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2158  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2159 
2160  smpAddAssign( ~lhs, A * B );
2161  }
2163  //**********************************************************************************************
2164 
2165  //**SMP addition assignment to sparse matrices**************************************************
2166  // No special implementation for the SMP addition assignment to sparse matrices.
2167  //**********************************************************************************************
2168 
2169  //**SMP subtraction assignment to dense matrices************************************************
2184  template< typename MT // Type of the target dense matrix
2185  , bool SO > // Storage order of the target dense matrix
2186  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2187  smpSubAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2188  {
2190 
2191  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2192  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2193 
2194  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2195  return;
2196  }
2197 
2198  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2199  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2200 
2201  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2202  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2203  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2204  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2205  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2206  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2207 
2208  smpSubAssign( ~lhs, A * B );
2209  }
2211  //**********************************************************************************************
2212 
2213  //**SMP subtraction assignment to sparse matrices***********************************************
2214  // No special implementation for the SMP subtraction assignment to sparse matrices.
2215  //**********************************************************************************************
2216 
2217  //**SMP multiplication assignment to dense matrices*********************************************
2218  // No special implementation for the SMP multiplication assignment to dense matrices.
2219  //**********************************************************************************************
2220 
2221  //**SMP multiplication assignment to sparse matrices********************************************
2222  // No special implementation for the SMP multiplication assignment to sparse matrices.
2223  //**********************************************************************************************
2224 
2225  //**Compile time checks*************************************************************************
2232  //**********************************************************************************************
2233 };
2234 //*************************************************************************************************
2235 
2236 
2237 
2238 
2239 //=================================================================================================
2240 //
2241 // DMATSCALARMULTEXPR SPECIALIZATION
2242 //
2243 //=================================================================================================
2244 
2245 //*************************************************************************************************
2253 template< typename MT1 // Type of the left-hand side dense matrix
2254  , typename MT2 // Type of the right-hand side dense matrix
2255  , typename ST > // Type of the right-hand side scalar value
2256 class DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >
2257  : public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
2258  , private MatScalarMultExpr
2259  , private Computation
2260 {
2261  private:
2262  //**Type definitions****************************************************************************
2263  typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
2264  typedef typename MMM::ResultType RES;
2265  typedef typename MT1::ResultType RT1;
2266  typedef typename MT2::ResultType RT2;
2267  typedef typename RT1::ElementType ET1;
2268  typedef typename RT2::ElementType ET2;
2269  typedef typename MT1::CompositeType CT1;
2270  typedef typename MT2::CompositeType CT2;
2271  //**********************************************************************************************
2272 
2273  //**********************************************************************************************
2275  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2276  //**********************************************************************************************
2277 
2278  //**********************************************************************************************
2280  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2281  //**********************************************************************************************
2282 
2283  //**********************************************************************************************
2285 
2288  template< typename MT >
2289  struct UseSMPAssign {
2290  enum { value = ( evaluateLeft || evaluateRight ) };
2291  };
2292  //**********************************************************************************************
2293 
2294  //**********************************************************************************************
2296 
2299  template< typename T1, typename T2, typename T3, typename T4 >
2300  struct UseSinglePrecisionKernel {
2301  enum { value = IsFloat<typename T1::ElementType>::value &&
2302  IsFloat<typename T2::ElementType>::value &&
2303  IsFloat<typename T3::ElementType>::value &&
2304  !IsComplex<T4>::value };
2305  };
2306  //**********************************************************************************************
2307 
2308  //**********************************************************************************************
2310 
2313  template< typename T1, typename T2, typename T3, typename T4 >
2314  struct UseDoublePrecisionKernel {
2315  enum { value = IsDouble<typename T1::ElementType>::value &&
2316  IsDouble<typename T2::ElementType>::value &&
2317  IsDouble<typename T3::ElementType>::value &&
2318  !IsComplex<T4>::value };
2319  };
2320  //**********************************************************************************************
2321 
2322  //**********************************************************************************************
2324 
2327  template< typename T1, typename T2, typename T3 >
2328  struct UseSinglePrecisionComplexKernel {
2329  typedef complex<float> Type;
2330  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2331  IsSame<typename T2::ElementType,Type>::value &&
2332  IsSame<typename T3::ElementType,Type>::value };
2333  };
2334  //**********************************************************************************************
2335 
2336  //**********************************************************************************************
2338 
2341  template< typename T1, typename T2, typename T3 >
2342  struct UseDoublePrecisionComplexKernel {
2343  typedef complex<double> Type;
2344  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2345  IsSame<typename T2::ElementType,Type>::value &&
2346  IsSame<typename T3::ElementType,Type>::value };
2347  };
2348  //**********************************************************************************************
2349 
2350  //**********************************************************************************************
2352 
2354  template< typename T1, typename T2, typename T3, typename T4 >
2355  struct UseDefaultKernel {
2356  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2357  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2358  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2359  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2360  };
2361  //**********************************************************************************************
2362 
2363  //**********************************************************************************************
2365 
2367  template< typename T1, typename T2, typename T3, typename T4 >
2368  struct UseVectorizedDefaultKernel {
2369  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2370  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2371  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2372  IsSame<typename T1::ElementType,T4>::value &&
2373  IntrinsicTrait<typename T1::ElementType>::addition &&
2374  IntrinsicTrait<typename T1::ElementType>::subtraction &&
2375  IntrinsicTrait<typename T1::ElementType>::multiplication };
2376  };
2377  //**********************************************************************************************
2378 
2379  public:
2380  //**Type definitions****************************************************************************
2381  typedef DMatScalarMultExpr<MMM,ST,true> This;
2382  typedef typename MultTrait<RES,ST>::Type ResultType;
2383  typedef typename ResultType::OppositeType OppositeType;
2384  typedef typename ResultType::TransposeType TransposeType;
2385  typedef typename ResultType::ElementType ElementType;
2386  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2387  typedef const ElementType ReturnType;
2388  typedef const ResultType CompositeType;
2389 
2391  typedef const TDMatTDMatMultExpr<MT1,MT2> LeftOperand;
2392 
2394  typedef ST RightOperand;
2395 
2397  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2398 
2400  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2401  //**********************************************************************************************
2402 
2403  //**Compilation flags***************************************************************************
2405  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2406  IsSame<ET1,ET2>::value &&
2407  IsSame<ET1,ST>::value &&
2408  IntrinsicTrait<ET1>::addition &&
2409  IntrinsicTrait<ET1>::multiplication };
2410 
2412  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2413  !evaluateRight && MT2::smpAssignable };
2414  //**********************************************************************************************
2415 
2416  //**Constructor*********************************************************************************
2422  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2423  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2424  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2425  {}
2426  //**********************************************************************************************
2427 
2428  //**Access operator*****************************************************************************
2435  inline ReturnType operator()( size_t i, size_t j ) const {
2436  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2437  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2438  return matrix_(i,j) * scalar_;
2439  }
2440  //**********************************************************************************************
2441 
2442  //**Rows function*******************************************************************************
2447  inline size_t rows() const {
2448  return matrix_.rows();
2449  }
2450  //**********************************************************************************************
2451 
2452  //**Columns function****************************************************************************
2457  inline size_t columns() const {
2458  return matrix_.columns();
2459  }
2460  //**********************************************************************************************
2461 
2462  //**Left operand access*************************************************************************
2467  inline LeftOperand leftOperand() const {
2468  return matrix_;
2469  }
2470  //**********************************************************************************************
2471 
2472  //**Right operand access************************************************************************
2477  inline RightOperand rightOperand() const {
2478  return scalar_;
2479  }
2480  //**********************************************************************************************
2481 
2482  //**********************************************************************************************
2488  template< typename T >
2489  inline bool canAlias( const T* alias ) const {
2490  return matrix_.canAlias( alias );
2491  }
2492  //**********************************************************************************************
2493 
2494  //**********************************************************************************************
2500  template< typename T >
2501  inline bool isAliased( const T* alias ) const {
2502  return matrix_.isAliased( alias );
2503  }
2504  //**********************************************************************************************
2505 
2506  //**********************************************************************************************
2511  inline bool isAligned() const {
2512  return matrix_.isAligned();
2513  }
2514  //**********************************************************************************************
2515 
2516  //**********************************************************************************************
2521  inline bool canSMPAssign() const {
2522  typename MMM::RightOperand B( matrix_.rightOperand() );
2523  return ( !BLAZE_BLAS_IS_PARALLEL ||
2524  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
2525  ( B.columns() > SMP_TDMATTDMATMULT_THRESHOLD );
2526  }
2527  //**********************************************************************************************
2528 
2529  private:
2530  //**Member variables****************************************************************************
2531  LeftOperand matrix_;
2532  RightOperand scalar_;
2533  //**********************************************************************************************
2534 
2535  //**Assignment to dense matrices****************************************************************
2547  template< typename MT // Type of the target dense matrix
2548  , bool SO > // Storage order of the target dense matrix
2549  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2550  {
2552 
2553  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2554  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2555 
2556  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2557  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2558 
2559  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2560  return;
2561  }
2562  else if( left.columns() == 0UL ) {
2563  reset( ~lhs );
2564  return;
2565  }
2566 
2567  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2568  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
2569 
2570  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2571  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2572  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2573  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2574  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2575  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2576 
2577  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2578  }
2579  //**********************************************************************************************
2580 
2581  //**Assignment to dense matrices (kernel selection)*********************************************
2592  template< typename MT3 // Type of the left-hand side target matrix
2593  , typename MT4 // Type of the left-hand side matrix operand
2594  , typename MT5 // Type of the right-hand side matrix operand
2595  , typename ST2 > // Type of the scalar value
2596  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2597  {
2598  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
2599  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2600  else
2601  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2602  }
2603  //**********************************************************************************************
2604 
2605  //**Default assignment to dense matrices********************************************************
2619  template< typename MT3 // Type of the left-hand side target matrix
2620  , typename MT4 // Type of the left-hand side matrix operand
2621  , typename MT5 // Type of the right-hand side matrix operand
2622  , typename ST2 > // Type of the scalar value
2623  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2624  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2625  {
2626  for( size_t i=0UL; i<A.rows(); ++i ) {
2627  for( size_t k=0UL; k<B.columns(); ++k ) {
2628  C(i,k) = A(i,0UL) * B(0UL,k);
2629  }
2630  for( size_t j=1UL; j<A.columns(); ++j ) {
2631  for( size_t k=0UL; k<B.columns(); ++k ) {
2632  C(i,k) += A(i,j) * B(j,k);
2633  }
2634  }
2635  for( size_t k=0UL; k<B.columns(); ++k ) {
2636  C(i,k) *= scalar;
2637  }
2638  }
2639  }
2640  //**********************************************************************************************
2641 
2642  //**Vectorized default assignment to row-major dense matrices***********************************
2656  template< typename MT3 // Type of the left-hand side target matrix
2657  , typename MT4 // Type of the left-hand side matrix operand
2658  , typename MT5 // Type of the right-hand side matrix operand
2659  , typename ST2 > // Type of the scalar value
2660  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2661  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2662  {
2665 
2666  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2667  const typename MT5::OppositeType tmp( serial( B ) );
2668  assign( ~C, A * tmp * scalar );
2669  }
2670  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2671  const typename MT4::OppositeType tmp( serial( A ) );
2672  assign( ~C, tmp * B * scalar );
2673  }
2674  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2675  const typename MT5::OppositeType tmp( serial( B ) );
2676  assign( ~C, A * tmp * scalar );
2677  }
2678  else {
2679  const typename MT4::OppositeType tmp( serial( A ) );
2680  assign( ~C, tmp * B * scalar );
2681  }
2682  }
2683  //**********************************************************************************************
2684 
2685  //**Vectorized default assignment to column-major dense matrices********************************
2699  template< typename MT3 // Type of the left-hand side target matrix
2700  , typename MT4 // Type of the left-hand side matrix operand
2701  , typename MT5 // Type of the right-hand side matrix operand
2702  , typename ST2 > // Type of the scalar value
2703  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2704  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2705  {
2706  typedef IntrinsicTrait<ElementType> IT;
2707 
2708  const size_t M( A.rows() );
2709  const size_t N( B.columns() );
2710  const size_t K( A.columns() );
2711 
2712  const IntrinsicType factor( set( scalar ) );
2713 
2714  size_t i( 0UL );
2715 
2716  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2717  for( size_t j=0UL; j<N; ++j ) {
2718  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2719  for( size_t k=0UL; k<K; ++k ) {
2720  const IntrinsicType b1( set( B(k,j) ) );
2721  xmm1 = xmm1 + A.load(i ,k) * b1;
2722  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2723  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2724  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2725  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
2726  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
2727  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
2728  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
2729  }
2730  (~C).store( i , j, xmm1 * factor );
2731  (~C).store( i+IT::size , j, xmm2 * factor );
2732  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
2733  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
2734  (~C).store( i+IT::size*4UL, j, xmm5 * factor );
2735  (~C).store( i+IT::size*5UL, j, xmm6 * factor );
2736  (~C).store( i+IT::size*6UL, j, xmm7 * factor );
2737  (~C).store( i+IT::size*7UL, j, xmm8 * factor );
2738  }
2739  }
2740  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2741  size_t j( 0UL );
2742  for( ; (j+2UL) <= N; j+=2UL ) {
2743  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2744  for( size_t k=0UL; k<K; ++k ) {
2745  const IntrinsicType a1( A.load(i ,k) );
2746  const IntrinsicType a2( A.load(i+IT::size ,k) );
2747  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
2748  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
2749  const IntrinsicType b1( set( B(k,j ) ) );
2750  const IntrinsicType b2( set( B(k,j+1UL) ) );
2751  xmm1 = xmm1 + a1 * b1;
2752  xmm2 = xmm2 + a2 * b1;
2753  xmm3 = xmm3 + a3 * b1;
2754  xmm4 = xmm4 + a4 * b1;
2755  xmm5 = xmm5 + a1 * b2;
2756  xmm6 = xmm6 + a2 * b2;
2757  xmm7 = xmm7 + a3 * b2;
2758  xmm8 = xmm8 + a4 * b2;
2759  }
2760  (~C).store( i , j , xmm1 * factor );
2761  (~C).store( i+IT::size , j , xmm2 * factor );
2762  (~C).store( i+IT::size*2UL, j , xmm3 * factor );
2763  (~C).store( i+IT::size*3UL, j , xmm4 * factor );
2764  (~C).store( i , j+1UL, xmm5 * factor );
2765  (~C).store( i+IT::size , j+1UL, xmm6 * factor );
2766  (~C).store( i+IT::size*2UL, j+1UL, xmm7 * factor );
2767  (~C).store( i+IT::size*3UL, j+1UL, xmm8 * factor );
2768  }
2769  if( j < N ) {
2770  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2771  for( size_t k=0UL; k<K; ++k ) {
2772  const IntrinsicType b1( set( B(k,j) ) );
2773  xmm1 = xmm1 + A.load(i ,k) * b1;
2774  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2775  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2776  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2777  }
2778  (~C).store( i , j, xmm1 * factor );
2779  (~C).store( i+IT::size , j, xmm2 * factor );
2780  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
2781  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
2782  }
2783  }
2784  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2785  size_t j( 0UL );
2786  for( ; (j+2UL) <= N; j+=2UL ) {
2787  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2788  for( size_t k=0UL; k<K; ++k ) {
2789  const IntrinsicType a1( A.load(i ,k) );
2790  const IntrinsicType a2( A.load(i+IT::size,k) );
2791  const IntrinsicType b1( set( B(k,j ) ) );
2792  const IntrinsicType b2( set( B(k,j+1UL) ) );
2793  xmm1 = xmm1 + a1 * b1;
2794  xmm2 = xmm2 + a2 * b1;
2795  xmm3 = xmm3 + a1 * b2;
2796  xmm4 = xmm4 + a2 * b2;
2797  }
2798  (~C).store( i , j , xmm1 * factor );
2799  (~C).store( i+IT::size, j , xmm2 * factor );
2800  (~C).store( i , j+1UL, xmm3 * factor );
2801  (~C).store( i+IT::size, j+1UL, xmm4 * factor );
2802  }
2803  if( j < N ) {
2804  IntrinsicType xmm1, xmm2;
2805  for( size_t k=0UL; k<K; ++k ) {
2806  const IntrinsicType b1( set( B(k,j) ) );
2807  xmm1 = xmm1 + A.load(i ,k) * b1;
2808  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
2809  }
2810  (~C).store( i , j, xmm1 * factor );
2811  (~C).store( i+IT::size, j, xmm2 * factor );
2812  }
2813  }
2814  if( i < M ) {
2815  size_t j( 0UL );
2816  for( ; (j+2UL) <= N; j+=2UL ) {
2817  IntrinsicType xmm1, xmm2;
2818  for( size_t k=0UL; k<K; ++k ) {
2819  const IntrinsicType a1( A.load(i,k) );
2820  xmm1 = xmm1 + a1 * set( B(k,j ) );
2821  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2822  }
2823  (~C).store( i, j , xmm1 * factor );
2824  (~C).store( i, j+1UL, xmm2 * factor );
2825  }
2826  if( j < N ) {
2827  IntrinsicType xmm1;
2828  for( size_t k=0UL; k<K; ++k ) {
2829  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
2830  }
2831  (~C).store( i, j, xmm1 * factor );
2832  }
2833  }
2834  }
2835  //**********************************************************************************************
2836 
2837  //**BLAS-based assignment to dense matrices (default)*******************************************
2851  template< typename MT3 // Type of the left-hand side target matrix
2852  , typename MT4 // Type of the left-hand side matrix operand
2853  , typename MT5 // Type of the right-hand side matrix operand
2854  , typename ST2 > // Type of the scalar value
2855  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2856  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2857  {
2858  selectDefaultAssignKernel( C, A, B, scalar );
2859  }
2860  //**********************************************************************************************
2861 
2862  //**BLAS-based assignment to dense matrices (single precision)**********************************
2863 #if BLAZE_BLAS_MODE
2864 
2877  template< typename MT3 // Type of the left-hand side target matrix
2878  , typename MT4 // Type of the left-hand side matrix operand
2879  , typename MT5 // Type of the right-hand side matrix operand
2880  , typename ST2 > // Type of the scalar value
2881  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2882  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2883  {
2884  using boost::numeric_cast;
2885 
2889 
2890  const int M ( numeric_cast<int>( A.rows() ) );
2891  const int N ( numeric_cast<int>( B.columns() ) );
2892  const int K ( numeric_cast<int>( A.columns() ) );
2893  const int lda( numeric_cast<int>( A.spacing() ) );
2894  const int ldb( numeric_cast<int>( B.spacing() ) );
2895  const int ldc( numeric_cast<int>( C.spacing() ) );
2896 
2897  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2898  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2899  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2900  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2901  }
2902 #endif
2903  //**********************************************************************************************
2904 
2905  //**BLAS-based assignment to dense matrices (double precision)**********************************
2906 #if BLAZE_BLAS_MODE
2907 
2920  template< typename MT3 // Type of the left-hand side target matrix
2921  , typename MT4 // Type of the left-hand side matrix operand
2922  , typename MT5 // Type of the right-hand side matrix operand
2923  , typename ST2 > // Type of the scalar value
2924  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2925  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2926  {
2927  using boost::numeric_cast;
2928 
2932 
2933  const int M ( numeric_cast<int>( A.rows() ) );
2934  const int N ( numeric_cast<int>( B.columns() ) );
2935  const int K ( numeric_cast<int>( A.columns() ) );
2936  const int lda( numeric_cast<int>( A.spacing() ) );
2937  const int ldb( numeric_cast<int>( B.spacing() ) );
2938  const int ldc( numeric_cast<int>( C.spacing() ) );
2939 
2940  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2941  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2942  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2943  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2944  }
2945 #endif
2946  //**********************************************************************************************
2947 
2948  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2949 #if BLAZE_BLAS_MODE
2950 
2963  template< typename MT3 // Type of the left-hand side target matrix
2964  , typename MT4 // Type of the left-hand side matrix operand
2965  , typename MT5 // Type of the right-hand side matrix operand
2966  , typename ST2 > // Type of the scalar value
2967  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2968  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2969  {
2970  using boost::numeric_cast;
2971 
2975  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2976  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2977  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2978 
2979  const int M ( numeric_cast<int>( A.rows() ) );
2980  const int N ( numeric_cast<int>( B.columns() ) );
2981  const int K ( numeric_cast<int>( A.columns() ) );
2982  const int lda( numeric_cast<int>( A.spacing() ) );
2983  const int ldb( numeric_cast<int>( B.spacing() ) );
2984  const int ldc( numeric_cast<int>( C.spacing() ) );
2985  const complex<float> alpha( scalar );
2986  const complex<float> beta ( 0.0F, 0.0F );
2987 
2988  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2989  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2990  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2991  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2992  }
2993 #endif
2994  //**********************************************************************************************
2995 
2996  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2997 #if BLAZE_BLAS_MODE
2998 
3011  template< typename MT3 // Type of the left-hand side target matrix
3012  , typename MT4 // Type of the left-hand side matrix operand
3013  , typename MT5 // Type of the right-hand side matrix operand
3014  , typename ST2 > // Type of the scalar value
3015  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3016  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3017  {
3018  using boost::numeric_cast;
3019 
3023  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3024  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3025  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3026 
3027  const int M ( numeric_cast<int>( A.rows() ) );
3028  const int N ( numeric_cast<int>( B.columns() ) );
3029  const int K ( numeric_cast<int>( A.columns() ) );
3030  const int lda( numeric_cast<int>( A.spacing() ) );
3031  const int ldb( numeric_cast<int>( B.spacing() ) );
3032  const int ldc( numeric_cast<int>( C.spacing() ) );
3033  const complex<double> alpha( scalar );
3034  const complex<double> beta ( 0.0, 0.0 );
3035 
3036  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3037  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3038  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3039  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3040  }
3041 #endif
3042  //**********************************************************************************************
3043 
3044  //**Assignment to sparse matrices***************************************************************
3056  template< typename MT // Type of the target sparse matrix
3057  , bool SO > // Storage order of the target sparse matrix
3058  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3059  {
3061 
3062  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
3063 
3070 
3071  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3072  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3073 
3074  const TmpType tmp( serial( rhs ) );
3075  assign( ~lhs, tmp );
3076  }
3077  //**********************************************************************************************
3078 
3079  //**Addition assignment to dense matrices*******************************************************
3091  template< typename MT // Type of the target dense matrix
3092  , bool SO > // Storage order of the target dense matrix
3093  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3094  {
3096 
3097  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3098  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3099 
3100  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3101  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3102 
3103  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3104  return;
3105  }
3106 
3107  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3108  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3109 
3110  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3111  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3112  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3113  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3114  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3115  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3116 
3117  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3118  }
3119  //**********************************************************************************************
3120 
3121  //**Addition assignment to dense matrices (kernel selection)************************************
3132  template< typename MT3 // Type of the left-hand side target matrix
3133  , typename MT4 // Type of the left-hand side matrix operand
3134  , typename MT5 // Type of the right-hand side matrix operand
3135  , typename ST2 > // Type of the scalar value
3136  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3137  {
3138  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
3139  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3140  else
3141  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3142  }
3143  //**********************************************************************************************
3144 
3145  //**Default addition assignment to dense matrices***********************************************
3159  template< typename MT3 // Type of the left-hand side target matrix
3160  , typename MT4 // Type of the left-hand side matrix operand
3161  , typename MT5 // Type of the right-hand side matrix operand
3162  , typename ST2 > // Type of the scalar value
3163  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3164  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3165  {
3166  const ResultType tmp( serial( A * B * scalar ) );
3167  addAssign( C, tmp );
3168  }
3169  //**********************************************************************************************
3170 
3171  //**Vectorized default addition assignment to row-major dense matrices**************************
3185  template< typename MT3 // Type of the left-hand side target matrix
3186  , typename MT4 // Type of the left-hand side matrix operand
3187  , typename MT5 // Type of the right-hand side matrix operand
3188  , typename ST2 > // Type of the scalar value
3189  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3190  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3191  {
3194 
3195  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3196  const typename MT5::OppositeType tmp( serial( B ) );
3197  addAssign( ~C, A * tmp * scalar );
3198  }
3199  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3200  const typename MT4::OppositeType tmp( serial( A ) );
3201  addAssign( ~C, tmp * B * scalar );
3202  }
3203  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3204  const typename MT5::OppositeType tmp( serial( B ) );
3205  addAssign( ~C, A * tmp * scalar );
3206  }
3207  else {
3208  const typename MT4::OppositeType tmp( serial( A ) );
3209  addAssign( ~C, tmp * B * scalar );
3210  }
3211  }
3212  //**********************************************************************************************
3213 
3214  //**Vectorized default addition assignment to column-major dense matrices***********************
3228  template< typename MT3 // Type of the left-hand side target matrix
3229  , typename MT4 // Type of the left-hand side matrix operand
3230  , typename MT5 // Type of the right-hand side matrix operand
3231  , typename ST2 > // Type of the scalar value
3232  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3233  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3234  {
3235  typedef IntrinsicTrait<ElementType> IT;
3236 
3237  const size_t M( A.rows() );
3238  const size_t N( B.columns() );
3239  const size_t K( A.columns() );
3240 
3241  const IntrinsicType factor( set( scalar ) );
3242 
3243  size_t i( 0UL );
3244 
3245  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3246  for( size_t j=0UL; j<N; ++j ) {
3247  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3248  for( size_t k=0UL; k<K; ++k ) {
3249  const IntrinsicType b1( set( B(k,j) ) );
3250  xmm1 = xmm1 + A.load(i ,k) * b1;
3251  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3252  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3253  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3254  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3255  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3256  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3257  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3258  }
3259  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3260  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3261  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3262  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3263  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) + xmm5 * factor );
3264  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) + xmm6 * factor );
3265  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) + xmm7 * factor );
3266  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) + xmm8 * factor );
3267  }
3268  }
3269  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3270  size_t j( 0UL );
3271  for( ; (j+2UL) <= N; j+=2UL ) {
3272  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3273  for( size_t k=0UL; k<K; ++k ) {
3274  const IntrinsicType a1( A.load(i ,k) );
3275  const IntrinsicType a2( A.load(i+IT::size ,k) );
3276  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
3277  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
3278  const IntrinsicType b1( set( B(k,j ) ) );
3279  const IntrinsicType b2( set( B(k,j+1UL) ) );
3280  xmm1 = xmm1 + a1 * b1;
3281  xmm2 = xmm2 + a2 * b1;
3282  xmm3 = xmm3 + a3 * b1;
3283  xmm4 = xmm4 + a4 * b1;
3284  xmm5 = xmm5 + a1 * b2;
3285  xmm6 = xmm6 + a2 * b2;
3286  xmm7 = xmm7 + a3 * b2;
3287  xmm8 = xmm8 + a4 * b2;
3288  }
3289  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3290  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) + xmm2 * factor );
3291  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) + xmm3 * factor );
3292  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) + xmm4 * factor );
3293  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
3294  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) + xmm6 * factor );
3295  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) + xmm7 * factor );
3296  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) + xmm8 * factor );
3297  }
3298  if( j < N ) {
3299  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3300  for( size_t k=0UL; k<K; ++k ) {
3301  const IntrinsicType b1( set( B(k,j) ) );
3302  xmm1 = xmm1 + A.load(i ,k) * b1;
3303  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3304  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3305  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3306  }
3307  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3308  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3309  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3310  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3311  }
3312  }
3313  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3314  size_t j( 0UL );
3315  for( ; (j+2UL) <= N; j+=2UL ) {
3316  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3317  for( size_t k=0UL; k<K; ++k ) {
3318  const IntrinsicType a1( A.load(i ,k) );
3319  const IntrinsicType a2( A.load(i+IT::size,k) );
3320  const IntrinsicType b1( set( B(k,j ) ) );
3321  const IntrinsicType b2( set( B(k,j+1UL) ) );
3322  xmm1 = xmm1 + a1 * b1;
3323  xmm2 = xmm2 + a2 * b1;
3324  xmm3 = xmm3 + a1 * b2;
3325  xmm4 = xmm4 + a2 * b2;
3326  }
3327  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3328  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) + xmm2 * factor );
3329  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
3330  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) + xmm4 * factor );
3331  }
3332  if( j < N ) {
3333  IntrinsicType xmm1, xmm2;
3334  for( size_t k=0UL; k<K; ++k ) {
3335  const IntrinsicType b1( set( B(k,j) ) );
3336  xmm1 = xmm1 + A.load(i ,k) * b1;
3337  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3338  }
3339  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3340  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) + xmm2 * factor );
3341  }
3342  }
3343  if( i < M ) {
3344  size_t j( 0UL );
3345  for( ; (j+2UL) <= N; j+=2UL ) {
3346  IntrinsicType xmm1, xmm2;
3347  for( size_t k=0UL; k<K; ++k ) {
3348  const IntrinsicType a1( A.load(i,k) );
3349  xmm1 = xmm1 + a1 * set( B(k,j ) );
3350  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3351  }
3352  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3353  (~C).store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
3354  }
3355  if( j < N ) {
3356  IntrinsicType xmm1;
3357  for( size_t k=0UL; k<K; ++k ) {
3358  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
3359  }
3360  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
3361  }
3362  }
3363  }
3364  //**********************************************************************************************
3365 
3366  //**BLAS-based addition assignment to dense matrices (default)**********************************
3380  template< typename MT3 // Type of the left-hand side target matrix
3381  , typename MT4 // Type of the left-hand side matrix operand
3382  , typename MT5 // Type of the right-hand side matrix operand
3383  , typename ST2 > // Type of the scalar value
3384  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3385  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3386  {
3387  selectDefaultAddAssignKernel( C, A, B, scalar );
3388  }
3389  //**********************************************************************************************
3390 
3391  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3392 #if BLAZE_BLAS_MODE
3393 
3406  template< typename MT3 // Type of the left-hand side target matrix
3407  , typename MT4 // Type of the left-hand side matrix operand
3408  , typename MT5 // Type of the right-hand side matrix operand
3409  , typename ST2 > // Type of the scalar value
3410  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3411  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3412  {
3413  using boost::numeric_cast;
3414 
3418 
3419  const int M ( numeric_cast<int>( A.rows() ) );
3420  const int N ( numeric_cast<int>( B.columns() ) );
3421  const int K ( numeric_cast<int>( A.columns() ) );
3422  const int lda( numeric_cast<int>( A.spacing() ) );
3423  const int ldb( numeric_cast<int>( B.spacing() ) );
3424  const int ldc( numeric_cast<int>( C.spacing() ) );
3425 
3426  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3427  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3428  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3429  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3430  }
3431 #endif
3432  //**********************************************************************************************
3433 
3434  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3435 #if BLAZE_BLAS_MODE
3436 
3449  template< typename MT3 // Type of the left-hand side target matrix
3450  , typename MT4 // Type of the left-hand side matrix operand
3451  , typename MT5 // Type of the right-hand side matrix operand
3452  , typename ST2 > // Type of the scalar value
3453  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3454  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3455  {
3456  using boost::numeric_cast;
3457 
3461 
3462  const int M ( numeric_cast<int>( A.rows() ) );
3463  const int N ( numeric_cast<int>( B.columns() ) );
3464  const int K ( numeric_cast<int>( A.columns() ) );
3465  const int lda( numeric_cast<int>( A.spacing() ) );
3466  const int ldb( numeric_cast<int>( B.spacing() ) );
3467  const int ldc( numeric_cast<int>( C.spacing() ) );
3468 
3469  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3470  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3471  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3472  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3473  }
3474 #endif
3475  //**********************************************************************************************
3476 
3477  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3478 #if BLAZE_BLAS_MODE
3479 
3492  template< typename MT3 // Type of the left-hand side target matrix
3493  , typename MT4 // Type of the left-hand side matrix operand
3494  , typename MT5 // Type of the right-hand side matrix operand
3495  , typename ST2 > // Type of the scalar value
3496  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3497  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3498  {
3499  using boost::numeric_cast;
3500 
3504  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3505  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3506  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3507 
3508  const int M ( numeric_cast<int>( A.rows() ) );
3509  const int N ( numeric_cast<int>( B.columns() ) );
3510  const int K ( numeric_cast<int>( A.columns() ) );
3511  const int lda( numeric_cast<int>( A.spacing() ) );
3512  const int ldb( numeric_cast<int>( B.spacing() ) );
3513  const int ldc( numeric_cast<int>( C.spacing() ) );
3514  const complex<float> alpha( scalar );
3515  const complex<float> beta ( 1.0F, 0.0F );
3516 
3517  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3518  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3519  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3520  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3521  }
3522 #endif
3523  //**********************************************************************************************
3524 
3525  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3526 #if BLAZE_BLAS_MODE
3527 
3540  template< typename MT3 // Type of the left-hand side target matrix
3541  , typename MT4 // Type of the left-hand side matrix operand
3542  , typename MT5 // Type of the right-hand side matrix operand
3543  , typename ST2 > // Type of the scalar value
3544  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3545  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3546  {
3547  using boost::numeric_cast;
3548 
3552  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3553  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3554  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3555 
3556  const int M ( numeric_cast<int>( A.rows() ) );
3557  const int N ( numeric_cast<int>( B.columns() ) );
3558  const int K ( numeric_cast<int>( A.columns() ) );
3559  const int lda( numeric_cast<int>( A.spacing() ) );
3560  const int ldb( numeric_cast<int>( B.spacing() ) );
3561  const int ldc( numeric_cast<int>( C.spacing() ) );
3562  const complex<double> alpha( scalar );
3563  const complex<double> beta ( 1.0, 0.0 );
3564 
3565  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3566  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3567  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3568  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3569  }
3570 #endif
3571  //**********************************************************************************************
3572 
3573  //**Addition assignment to sparse matrices******************************************************
3574  // No special implementation for the addition assignment to sparse matrices.
3575  //**********************************************************************************************
3576 
3577  //**Subtraction assignment to dense matrices****************************************************
3589  template< typename MT // Type of the target dense matrix
3590  , bool SO > // Storage order of the target dense matrix
3591  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3592  {
3594 
3595  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3596  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3597 
3598  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3599  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3600 
3601  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3602  return;
3603  }
3604 
3605  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3606  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3607 
3608  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3609  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3610  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3611  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3612  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3613  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3614 
3615  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3616  }
3617  //**********************************************************************************************
3618 
3619  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3630  template< typename MT3 // Type of the left-hand side target matrix
3631  , typename MT4 // Type of the left-hand side matrix operand
3632  , typename MT5 // Type of the right-hand side matrix operand
3633  , typename ST2 > // Type of the scalar value
3634  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3635  {
3636  if( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD )
3637  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3638  else
3639  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3640  }
3641  //**********************************************************************************************
3642 
3643  //**Default subtraction assignment to dense matrices********************************************
3657  template< typename MT3 // Type of the left-hand side target matrix
3658  , typename MT4 // Type of the left-hand side matrix operand
3659  , typename MT5 // Type of the right-hand side matrix operand
3660  , typename ST2 > // Type of the scalar value
3661  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3662  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3663  {
3664  const ResultType tmp( serial( A * B * scalar ) );
3665  subAssign( C, tmp );
3666  }
3667  //**********************************************************************************************
3668 
3669  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3683  template< typename MT3 // Type of the left-hand side target matrix
3684  , typename MT4 // Type of the left-hand side matrix operand
3685  , typename MT5 // Type of the right-hand side matrix operand
3686  , typename ST2 > // Type of the scalar value
3687  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3688  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3689  {
3692 
3693  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3694  const typename MT5::OppositeType tmp( serial( B ) );
3695  subAssign( ~C, A * tmp * scalar );
3696  }
3697  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3698  const typename MT4::OppositeType tmp( serial( A ) );
3699  subAssign( ~C, tmp * B * scalar );
3700  }
3701  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3702  const typename MT5::OppositeType tmp( serial( B ) );
3703  subAssign( ~C, A * tmp * scalar );
3704  }
3705  else {
3706  const typename MT4::OppositeType tmp( serial( A ) );
3707  subAssign( ~C, tmp * B * scalar );
3708  }
3709  }
3710  //**********************************************************************************************
3711 
3712  //**Vectorized default subtraction assignment to column-major dense matrices********************
3726  template< typename MT3 // Type of the left-hand side target matrix
3727  , typename MT4 // Type of the left-hand side matrix operand
3728  , typename MT5 // Type of the right-hand side matrix operand
3729  , typename ST2 > // Type of the scalar value
3730  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3731  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3732  {
3733  typedef IntrinsicTrait<ElementType> IT;
3734 
3735  const size_t M( A.rows() );
3736  const size_t N( B.columns() );
3737  const size_t K( A.columns() );
3738 
3739  const IntrinsicType factor( set( scalar ) );
3740 
3741  size_t i( 0UL );
3742 
3743  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3744  for( size_t j=0UL; j<N; ++j ) {
3745  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3746  for( size_t k=0UL; k<K; ++k ) {
3747  const IntrinsicType b1( set( B(k,j) ) );
3748  xmm1 = xmm1 + A.load(i ,k) * b1;
3749  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3750  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3751  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3752  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3753  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3754  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3755  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3756  }
3757  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3758  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3759  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3760  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3761  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) - xmm5 * factor );
3762  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) - xmm6 * factor );
3763  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) - xmm7 * factor );
3764  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) - xmm8 * factor );
3765  }
3766  }
3767  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3768  size_t j( 0UL );
3769  for( ; (j+2UL) <= N; j+=2UL ) {
3770  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3771  for( size_t k=0UL; k<K; ++k ) {
3772  const IntrinsicType a1( A.load(i ,k) );
3773  const IntrinsicType a2( A.load(i+IT::size ,k) );
3774  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
3775  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
3776  const IntrinsicType b1( set( B(k,j ) ) );
3777  const IntrinsicType b2( set( B(k,j+1UL) ) );
3778  xmm1 = xmm1 + a1 * b1;
3779  xmm2 = xmm2 + a2 * b1;
3780  xmm3 = xmm3 + a3 * b1;
3781  xmm4 = xmm4 + a4 * b1;
3782  xmm5 = xmm5 + a1 * b2;
3783  xmm6 = xmm6 + a2 * b2;
3784  xmm7 = xmm7 + a3 * b2;
3785  xmm8 = xmm8 + a4 * b2;
3786  }
3787  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3788  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) - xmm2 * factor );
3789  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) - xmm3 * factor );
3790  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) - xmm4 * factor );
3791  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
3792  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) - xmm6 * factor );
3793  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) - xmm7 * factor );
3794  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) - xmm8 * factor );
3795  }
3796  if( j < N ) {
3797  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3798  for( size_t k=0UL; k<K; ++k ) {
3799  const IntrinsicType b1( set( B(k,j) ) );
3800  xmm1 = xmm1 + A.load(i ,k) * b1;
3801  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3802  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3803  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3804  }
3805  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3806  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3807  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3808  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3809  }
3810  }
3811  for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3812  size_t j( 0UL );
3813  for( ; (j+2UL) <= N; j+=2UL ) {
3814  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3815  for( size_t k=0UL; k<K; ++k ) {
3816  const IntrinsicType a1( A.load(i ,k) );
3817  const IntrinsicType a2( A.load(i+IT::size,k) );
3818  const IntrinsicType b1( set( B(k,j ) ) );
3819  const IntrinsicType b2( set( B(k,j+1UL) ) );
3820  xmm1 = xmm1 + a1 * b1;
3821  xmm2 = xmm2 + a2 * b1;
3822  xmm3 = xmm3 + a1 * b2;
3823  xmm4 = xmm4 + a2 * b2;
3824  }
3825  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3826  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) - xmm2 * factor );
3827  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
3828  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) - xmm4 * factor );
3829  }
3830  if( j < N ) {
3831  IntrinsicType xmm1, xmm2;
3832  for( size_t k=0UL; k<K; ++k ) {
3833  const IntrinsicType b1( set( B(k,j) ) );
3834  xmm1 = xmm1 + A.load(i ,k) * b1;
3835  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3836  }
3837  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3838  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) - xmm2 * factor );
3839  }
3840  }
3841  if( i < M ) {
3842  size_t j( 0UL );
3843  for( ; (j+2UL) <= N; j+=2UL ) {
3844  IntrinsicType xmm1, xmm2;
3845  for( size_t k=0UL; k<K; ++k ) {
3846  const IntrinsicType a1( A.load(i,k) );
3847  xmm1 = xmm1 + a1 * set( B(k,j ) );
3848  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
3849  }
3850  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3851  (~C).store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
3852  }
3853  if( j < N ) {
3854  IntrinsicType xmm1;
3855  for( size_t k=0UL; k<K; ++k ) {
3856  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
3857  }
3858  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
3859  }
3860  }
3861  }
3862  //**********************************************************************************************
3863 
3864  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3878  template< typename MT3 // Type of the left-hand side target matrix
3879  , typename MT4 // Type of the left-hand side matrix operand
3880  , typename MT5 // Type of the right-hand side matrix operand
3881  , typename ST2 > // Type of the scalar value
3882  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3883  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3884  {
3885  selectDefaultSubAssignKernel( C, A, B, scalar );
3886  }
3887  //**********************************************************************************************
3888 
3889  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3890 #if BLAZE_BLAS_MODE
3891 
3904  template< typename MT3 // Type of the left-hand side target matrix
3905  , typename MT4 // Type of the left-hand side matrix operand
3906  , typename MT5 // Type of the right-hand side matrix operand
3907  , typename ST2 > // Type of the scalar value
3908  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3909  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3910  {
3911  using boost::numeric_cast;
3912 
3916 
3917  const int M ( numeric_cast<int>( A.rows() ) );
3918  const int N ( numeric_cast<int>( B.columns() ) );
3919  const int K ( numeric_cast<int>( A.columns() ) );
3920  const int lda( numeric_cast<int>( A.spacing() ) );
3921  const int ldb( numeric_cast<int>( B.spacing() ) );
3922  const int ldc( numeric_cast<int>( C.spacing() ) );
3923 
3924  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3925  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3926  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3927  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3928  }
3929 #endif
3930  //**********************************************************************************************
3931 
3932  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3933 #if BLAZE_BLAS_MODE
3934 
3947  template< typename MT3 // Type of the left-hand side target matrix
3948  , typename MT4 // Type of the left-hand side matrix operand
3949  , typename MT5 // Type of the right-hand side matrix operand
3950  , typename ST2 > // Type of the scalar value
3951  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3952  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3953  {
3954  using boost::numeric_cast;
3955 
3959 
3960  const int M ( numeric_cast<int>( A.rows() ) );
3961  const int N ( numeric_cast<int>( B.columns() ) );
3962  const int K ( numeric_cast<int>( A.columns() ) );
3963  const int lda( numeric_cast<int>( A.spacing() ) );
3964  const int ldb( numeric_cast<int>( B.spacing() ) );
3965  const int ldc( numeric_cast<int>( C.spacing() ) );
3966 
3967  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3968  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3969  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3970  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3971  }
3972 #endif
3973  //**********************************************************************************************
3974 
3975  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3976 #if BLAZE_BLAS_MODE
3977 
3990  template< typename MT3 // Type of the left-hand side target matrix
3991  , typename MT4 // Type of the left-hand side matrix operand
3992  , typename MT5 // Type of the right-hand side matrix operand
3993  , typename ST2 > // Type of the scalar value
3994  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3995  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3996  {
3997  using boost::numeric_cast;
3998 
4002  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
4003  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
4004  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
4005 
4006  const int M ( numeric_cast<int>( A.rows() ) );
4007  const int N ( numeric_cast<int>( B.columns() ) );
4008  const int K ( numeric_cast<int>( A.columns() ) );
4009  const int lda( numeric_cast<int>( A.spacing() ) );
4010  const int ldb( numeric_cast<int>( B.spacing() ) );
4011  const int ldc( numeric_cast<int>( C.spacing() ) );
4012  const complex<float> alpha( -scalar );
4013  const complex<float> beta ( 1.0F, 0.0F );
4014 
4015  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4016  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4017  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4018  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4019  }
4020 #endif
4021  //**********************************************************************************************
4022 
4023  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
4024 #if BLAZE_BLAS_MODE
4025 
4038  template< typename MT3 // Type of the left-hand side target matrix
4039  , typename MT4 // Type of the left-hand side matrix operand
4040  , typename MT5 // Type of the right-hand side matrix operand
4041  , typename ST2 > // Type of the scalar value
4042  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4043  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4044  {
4045  using boost::numeric_cast;
4046 
4050  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
4051  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
4052  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
4053 
4054  const int M ( numeric_cast<int>( A.rows() ) );
4055  const int N ( numeric_cast<int>( B.columns() ) );
4056  const int K ( numeric_cast<int>( A.columns() ) );
4057  const int lda( numeric_cast<int>( A.spacing() ) );
4058  const int ldb( numeric_cast<int>( B.spacing() ) );
4059  const int ldc( numeric_cast<int>( C.spacing() ) );
4060  const complex<double> alpha( -scalar );
4061  const complex<double> beta ( 1.0, 0.0 );
4062 
4063  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4064  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4065  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4066  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4067  }
4068 #endif
4069  //**********************************************************************************************
4070 
4071  //**Subtraction assignment to sparse matrices***************************************************
4072  // No special implementation for the subtraction assignment to sparse matrices.
4073  //**********************************************************************************************
4074 
4075  //**Multiplication assignment to dense matrices*************************************************
4076  // No special implementation for the multiplication assignment to dense matrices.
4077  //**********************************************************************************************
4078 
4079  //**Multiplication assignment to sparse matrices************************************************
4080  // No special implementation for the multiplication assignment to sparse matrices.
4081  //**********************************************************************************************
4082 
4083  //**SMP assignment to dense matrices************************************************************
4098  template< typename MT // Type of the target dense matrix
4099  , bool SO > // Storage order of the target dense matrix
4100  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4101  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4102  {
4104 
4105  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4106  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4107 
4108  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4109  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4110 
4111  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4112  return;
4113  }
4114  else if( left.columns() == 0UL ) {
4115  reset( ~lhs );
4116  return;
4117  }
4118 
4119  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4120  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4121 
4122  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4123  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4124  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4125  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4126  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4127  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4128 
4129  smpAssign( ~lhs, A * B * rhs.scalar_ );
4130  }
4131  //**********************************************************************************************
4132 
4133  //**SMP assignment to sparse matrices***********************************************************
4148  template< typename MT // Type of the target sparse matrix
4149  , bool SO > // Storage order of the target sparse matrix
4150  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4151  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4152  {
4154 
4155  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
4156 
4163 
4164  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4165  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4166 
4167  const TmpType tmp( rhs );
4168  smpAssign( ~lhs, tmp );
4169  }
4170  //**********************************************************************************************
4171 
4172  //**SMP addition assignment to dense matrices***************************************************
4187  template< typename MT // Type of the target dense matrix
4188  , bool SO > // Storage order of the target dense matrix
4189  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4190  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4191  {
4193 
4194  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4195  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4196 
4197  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4198  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4199 
4200  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4201  return;
4202  }
4203 
4204  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4205  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4206 
4207  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4208  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4209  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4210  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4211  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4212  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4213 
4214  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
4215  }
4216  //**********************************************************************************************
4217 
4218  //**SMP addition assignment to sparse matrices**************************************************
4219  // No special implementation for the SMP addition assignment to sparse matrices.
4220  //**********************************************************************************************
4221 
4222  //**SMP subtraction assignment to dense matrices************************************************
4237  template< typename MT // Type of the target dense matrix
4238  , bool SO > // Storage order of the target dense matrix
4239  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4240  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4241  {
4243 
4244  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4245  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4246 
4247  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4248  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4249 
4250  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4251  return;
4252  }
4253 
4254  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4255  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4256 
4257  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4258  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4259  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4260  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4261  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4262  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4263 
4264  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
4265  }
4266  //**********************************************************************************************
4267 
4268  //**SMP subtraction assignment to sparse matrices***********************************************
4269  // No special implementation for the SMP subtraction assignment to sparse matrices.
4270  //**********************************************************************************************
4271 
4272  //**SMP multiplication assignment to dense matrices*********************************************
4273  // No special implementation for the SMP multiplication assignment to dense matrices.
4274  //**********************************************************************************************
4275 
4276  //**SMP multiplication assignment to sparse matrices********************************************
4277  // No special implementation for the SMP multiplication assignment to sparse matrices.
4278  //**********************************************************************************************
4279 
4280  //**Compile time checks*************************************************************************
4289  //**********************************************************************************************
4290 };
4292 //*************************************************************************************************
4293 
4294 
4295 
4296 
4297 //=================================================================================================
4298 //
4299 // GLOBAL BINARY ARITHMETIC OPERATORS
4300 //
4301 //=================================================================================================
4302 
4303 //*************************************************************************************************
4329 template< typename T1 // Type of the left-hand side dense matrix
4330  , typename T2 > // Type of the right-hand side dense matrix
4331 inline const TDMatTDMatMultExpr<T1,T2>
4333 {
4335 
4336  if( (~lhs).columns() != (~rhs).rows() )
4337  throw std::invalid_argument( "Matrix sizes do not match" );
4338 
4339  return TDMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
4340 }
4341 //*************************************************************************************************
4342 
4343 
4344 
4345 
4346 //=================================================================================================
4347 //
4348 // EXPRESSION TRAIT SPECIALIZATIONS
4349 //
4350 //=================================================================================================
4351 
4352 //*************************************************************************************************
4354 template< typename MT1, typename MT2, typename VT >
4355 struct TDMatDVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
4356 {
4357  public:
4358  //**********************************************************************************************
4359  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4360  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4361  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4362  , typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4363  , INVALID_TYPE >::Type Type;
4364  //**********************************************************************************************
4365 };
4367 //*************************************************************************************************
4368 
4369 
4370 //*************************************************************************************************
4372 template< typename MT1, typename MT2, typename VT >
4373 struct TDMatSVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
4374 {
4375  public:
4376  //**********************************************************************************************
4377  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4378  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4379  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4380  , typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4381  , INVALID_TYPE >::Type Type;
4382  //**********************************************************************************************
4383 };
4385 //*************************************************************************************************
4386 
4387 
4388 //*************************************************************************************************
4390 template< typename VT, typename MT1, typename MT2 >
4391 struct TDVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
4392 {
4393  public:
4394  //**********************************************************************************************
4395  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4396  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4397  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4398  , typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4399  , INVALID_TYPE >::Type Type;
4400  //**********************************************************************************************
4401 };
4403 //*************************************************************************************************
4404 
4405 
4406 //*************************************************************************************************
4408 template< typename VT, typename MT1, typename MT2 >
4409 struct TSVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
4410 {
4411  public:
4412  //**********************************************************************************************
4413  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4414  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4415  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4416  , typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4417  , INVALID_TYPE >::Type Type;
4418  //**********************************************************************************************
4419 };
4421 //*************************************************************************************************
4422 
4423 
4424 //*************************************************************************************************
4426 template< typename MT1, typename MT2, bool AF >
4427 struct SubmatrixExprTrait< TDMatTDMatMultExpr<MT1,MT2>, AF >
4428 {
4429  public:
4430  //**********************************************************************************************
4431  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4432  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4433  //**********************************************************************************************
4434 };
4436 //*************************************************************************************************
4437 
4438 
4439 //*************************************************************************************************
4441 template< typename MT1, typename MT2 >
4442 struct RowExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
4443 {
4444  public:
4445  //**********************************************************************************************
4446  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4447  //**********************************************************************************************
4448 };
4450 //*************************************************************************************************
4451 
4452 
4453 //*************************************************************************************************
4455 template< typename MT1, typename MT2 >
4456 struct ColumnExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
4457 {
4458  public:
4459  //**********************************************************************************************
4460  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
4461  //**********************************************************************************************
4462 };
4464 //*************************************************************************************************
4465 
4466 } // namespace blaze
4467 
4468 #endif
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:253
Data type constraint.
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:123
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4599
EnableIf< IsIntegral< T > >::Type store(T *address, const typename Store< T, sizeof(T)>::Type &value)
Aligned store of a vector of integral values.
Definition: Store.h:223
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4329
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:249
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:121
const size_t TDMATTDMATMULT_THRESHOLD
Column-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies t...
Definition: Thresholds.h:176
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:199
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:411
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:122
Header file for the IsColumnMajorMatrix type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTDMatMultExpr.h:392
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2408
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:251
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:244
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:298
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:249
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:690
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:131
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:370
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2404
Header file for the IsFloat type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:348
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:246
TDMatTDMatMultExpr< MT1, MT2 > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:243
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
const size_t SMP_TDMATTDMATMULT_THRESHOLD
SMP column-major dense matrix/column-major dense matrix multiplication threshold.This threshold speci...
Definition: Thresholds.h:903
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the multiplication trait.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:118
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TSVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:256
Header file for the TDMatSVecMultExprTrait class template.
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:358
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:250
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the serial shim.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:382
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
Header file for the IsNumeric type trait.
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:245
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:247
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:328
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:259
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:283
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:119
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:250
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2403
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the complex data type.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:262
Header file for the IsColumnVector type trait.
Header file for the IsResizable type trait.
Constraint on the data type.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTDMatMultExpr.h:402
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:338
Header file for the IsExpression type trait class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:120
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTDMatMultExpr.h:248
Header file for the FunctionTrace class.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:412