All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
75 #include <blaze/system/BLAS.h>
77 #include <blaze/util/Assert.h>
78 #include <blaze/util/Complex.h>
85 #include <blaze/util/DisableIf.h>
86 #include <blaze/util/EnableIf.h>
87 #include <blaze/util/InvalidType.h>
89 #include <blaze/util/SelectType.h>
90 #include <blaze/util/Types.h>
96 
97 
98 namespace blaze {
99 
100 //=================================================================================================
101 //
102 // CLASS DMATDMATMULTEXPR
103 //
104 //=================================================================================================
105 
106 //*************************************************************************************************
113 template< typename MT1 // Type of the left-hand side dense matrix
114  , typename MT2 > // Type of the right-hand side dense matrix
115 class DMatDMatMultExpr : public DenseMatrix< DMatDMatMultExpr<MT1,MT2>, false >
116  , private MatMatMultExpr
117  , private Computation
118 {
119  private:
120  //**Type definitions****************************************************************************
121  typedef typename MT1::ResultType RT1;
122  typedef typename MT2::ResultType RT2;
123  typedef typename RT1::ElementType ET1;
124  typedef typename RT2::ElementType ET2;
125  typedef typename MT1::CompositeType CT1;
126  typedef typename MT2::CompositeType CT2;
127  //**********************************************************************************************
128 
129  //**********************************************************************************************
132  //**********************************************************************************************
133 
134  //**********************************************************************************************
136  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
137  //**********************************************************************************************
138 
139  //**********************************************************************************************
141 
145  template< typename MT >
146  struct UseSMPAssign {
147  enum { value = ( evaluateLeft || evaluateRight ) };
148  };
150  //**********************************************************************************************
151 
152  //**********************************************************************************************
154 
157  template< typename T1, typename T2, typename T3 >
158  struct UseSinglePrecisionKernel {
159  enum { value = IsFloat<typename T1::ElementType>::value &&
160  IsFloat<typename T2::ElementType>::value &&
161  IsFloat<typename T3::ElementType>::value };
162  };
164  //**********************************************************************************************
165 
166  //**********************************************************************************************
168 
171  template< typename T1, typename T2, typename T3 >
172  struct UseDoublePrecisionKernel {
173  enum { value = IsDouble<typename T1::ElementType>::value &&
174  IsDouble<typename T2::ElementType>::value &&
175  IsDouble<typename T3::ElementType>::value };
176  };
178  //**********************************************************************************************
179 
180  //**********************************************************************************************
182 
186  template< typename T1, typename T2, typename T3 >
187  struct UseSinglePrecisionComplexKernel {
188  typedef complex<float> Type;
189  enum { value = IsSame<typename T1::ElementType,Type>::value &&
190  IsSame<typename T2::ElementType,Type>::value &&
191  IsSame<typename T3::ElementType,Type>::value };
192  };
194  //**********************************************************************************************
195 
196  //**********************************************************************************************
198 
202  template< typename T1, typename T2, typename T3 >
203  struct UseDoublePrecisionComplexKernel {
204  typedef complex<double> Type;
205  enum { value = IsSame<typename T1::ElementType,Type>::value &&
206  IsSame<typename T2::ElementType,Type>::value &&
207  IsSame<typename T3::ElementType,Type>::value };
208  };
210  //**********************************************************************************************
211 
212  //**********************************************************************************************
214 
217  template< typename T1, typename T2, typename T3 >
218  struct UseDefaultKernel {
219  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
220  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
221  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
222  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
223  };
225  //**********************************************************************************************
226 
227  //**********************************************************************************************
229 
232  template< typename T1, typename T2, typename T3 >
233  struct UseVectorizedDefaultKernel {
234  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
235  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
236  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
237  IntrinsicTrait<typename T1::ElementType>::addition &&
238  IntrinsicTrait<typename T1::ElementType>::subtraction &&
239  IntrinsicTrait<typename T1::ElementType>::multiplication };
240  };
242  //**********************************************************************************************
243 
244  public:
245  //**Type definitions****************************************************************************
252  typedef const ElementType ReturnType;
253  typedef const ResultType CompositeType;
254 
256  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
257 
259  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
260 
263 
266  //**********************************************************************************************
267 
268  //**Compilation flags***************************************************************************
270  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
274 
276  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
277  !evaluateRight && MT2::smpAssignable };
278  //**********************************************************************************************
279 
280  //**Constructor*********************************************************************************
286  explicit inline DMatDMatMultExpr( const MT1& lhs, const MT2& rhs )
287  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
288  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
289  {
290  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
291  }
292  //**********************************************************************************************
293 
294  //**Access operator*****************************************************************************
301  inline ReturnType operator()( size_t i, size_t j ) const {
302  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
303  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
304 
305  ElementType tmp;
306 
307  if( lhs_.columns() != 0UL ) {
308  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
309  tmp = lhs_(i,0UL) * rhs_(0UL,j);
310  for( size_t k=1UL; k<end; k+=2UL ) {
311  tmp += lhs_(i,k ) * rhs_(k ,j);
312  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
313  }
314  if( end < lhs_.columns() ) {
315  tmp += lhs_(i,end) * rhs_(end,j);
316  }
317  }
318  else {
319  reset( tmp );
320  }
321 
322  return tmp;
323  }
324  //**********************************************************************************************
325 
326  //**Rows function*******************************************************************************
331  inline size_t rows() const {
332  return lhs_.rows();
333  }
334  //**********************************************************************************************
335 
336  //**Columns function****************************************************************************
341  inline size_t columns() const {
342  return rhs_.columns();
343  }
344  //**********************************************************************************************
345 
346  //**Left operand access*************************************************************************
351  inline LeftOperand leftOperand() const {
352  return lhs_;
353  }
354  //**********************************************************************************************
355 
356  //**Right operand access************************************************************************
361  inline RightOperand rightOperand() const {
362  return rhs_;
363  }
364  //**********************************************************************************************
365 
366  //**********************************************************************************************
372  template< typename T >
373  inline bool canAlias( const T* alias ) const {
374  return ( lhs_.canAlias( alias ) || rhs_.canAlias( alias ) );
375  }
376  //**********************************************************************************************
377 
378  //**********************************************************************************************
384  template< typename T >
385  inline bool isAliased( const T* alias ) const {
386  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
387  }
388  //**********************************************************************************************
389 
390  //**********************************************************************************************
395  inline bool isAligned() const {
396  return lhs_.isAligned() && rhs_.isAligned();
397  }
398  //**********************************************************************************************
399 
400  //**********************************************************************************************
405  inline bool canSMPAssign() const {
406  return ( !BLAZE_BLAS_IS_PARALLEL ||
407  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
409  }
410  //**********************************************************************************************
411 
412  private:
413  //**Member variables****************************************************************************
416  //**********************************************************************************************
417 
418  //**Assignment to dense matrices****************************************************************
431  template< typename MT // Type of the target dense matrix
432  , bool SO > // Storage order of the target dense matrix
433  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
434  {
436 
437  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
438  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
439 
440  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
441  return;
442  }
443  else if( rhs.lhs_.columns() == 0UL ) {
444  reset( ~lhs );
445  return;
446  }
447 
448  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
449  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
450 
451  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
452  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
453  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
454  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
455  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
456  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
457 
458  DMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
459  }
461  //**********************************************************************************************
462 
463  //**Assignment to dense matrices (kernel selection)*********************************************
474  template< typename MT3 // Type of the left-hand side target matrix
475  , typename MT4 // Type of the left-hand side matrix operand
476  , typename MT5 > // Type of the right-hand side matrix operand
477  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
478  {
479  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
480  DMatDMatMultExpr::selectDefaultAssignKernel( C, A, B );
481  else
482  DMatDMatMultExpr::selectBlasAssignKernel( C, A, B );
483  }
485  //**********************************************************************************************
486 
487  //**Default assignment to dense matrices********************************************************
500  template< typename MT3 // Type of the left-hand side target matrix
501  , typename MT4 // Type of the left-hand side matrix operand
502  , typename MT5 > // Type of the right-hand side matrix operand
503  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
504  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
505  {
506  const size_t M( A.rows() );
507  const size_t N( B.columns() );
508  const size_t K( A.columns() );
509 
510  for( size_t i=0UL; i<M; ++i ) {
511  for( size_t j=0UL; j<N; ++j ) {
512  C(i,j) = A(i,0UL) * B(0UL,j);
513  }
514  for( size_t k=1UL; k<K; ++k ) {
515  for( size_t j=0UL; j<N; ++j ) {
516  C(i,j) += A(i,k) * B(k,j);
517  }
518  }
519  }
520  }
522  //**********************************************************************************************
523 
524  //**Vectorized default assignment to row-major dense matrices***********************************
538  template< typename MT3 // Type of the left-hand side target matrix
539  , typename MT4 // Type of the left-hand side matrix operand
540  , typename MT5 > // Type of the right-hand side matrix operand
541  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
542  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
543  {
544  typedef IntrinsicTrait<ElementType> IT;
545 
546  const size_t M( A.rows() );
547  const size_t N( B.columns() );
548  const size_t K( A.columns() );
549 
550  size_t j( 0UL );
551 
552  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
553  for( size_t i=0UL; i<M; ++i ) {
554  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
555  for( size_t k=0UL; k<K; ++k ) {
556  const IntrinsicType a1( set( A(i,k) ) );
557  xmm1 = xmm1 + a1 * B.load(k,j );
558  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
559  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
560  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
561  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
562  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
563  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
564  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
565  }
566  (~C).store( i, j , xmm1 );
567  (~C).store( i, j+IT::size , xmm2 );
568  (~C).store( i, j+IT::size*2UL, xmm3 );
569  (~C).store( i, j+IT::size*3UL, xmm4 );
570  (~C).store( i, j+IT::size*4UL, xmm5 );
571  (~C).store( i, j+IT::size*5UL, xmm6 );
572  (~C).store( i, j+IT::size*6UL, xmm7 );
573  (~C).store( i, j+IT::size*7UL, xmm8 );
574  }
575  }
576  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
577  size_t i( 0UL );
578  for( ; (i+2UL) <= M; i+=2UL ) {
579  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
580  for( size_t k=0UL; k<K; ++k ) {
581  const IntrinsicType a1( set( A(i ,k) ) );
582  const IntrinsicType a2( set( A(i+1UL,k) ) );
583  const IntrinsicType b1( B.load(k,j ) );
584  const IntrinsicType b2( B.load(k,j+IT::size ) );
585  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
586  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
587  xmm1 = xmm1 + a1 * b1;
588  xmm2 = xmm2 + a1 * b2;
589  xmm3 = xmm3 + a1 * b3;
590  xmm4 = xmm4 + a1 * b4;
591  xmm5 = xmm5 + a2 * b1;
592  xmm6 = xmm6 + a2 * b2;
593  xmm7 = xmm7 + a2 * b3;
594  xmm8 = xmm8 + a2 * b4;
595  }
596  (~C).store( i , j , xmm1 );
597  (~C).store( i , j+IT::size , xmm2 );
598  (~C).store( i , j+IT::size*2UL, xmm3 );
599  (~C).store( i , j+IT::size*3UL, xmm4 );
600  (~C).store( i+1UL, j , xmm5 );
601  (~C).store( i+1UL, j+IT::size , xmm6 );
602  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
603  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
604  }
605  if( i < M ) {
606  IntrinsicType xmm1, xmm2, xmm3, xmm4;
607  for( size_t k=0UL; k<K; ++k ) {
608  const IntrinsicType a1( set( A(i,k) ) );
609  xmm1 = xmm1 + a1 * B.load(k,j );
610  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
611  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
612  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
613  }
614  (~C).store( i, j , xmm1 );
615  (~C).store( i, j+IT::size , xmm2 );
616  (~C).store( i, j+IT::size*2UL, xmm3 );
617  (~C).store( i, j+IT::size*3UL, xmm4 );
618  }
619  }
620  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
621  size_t i( 0UL );
622  for( ; (i+2UL) <= M; i+=2UL ) {
623  IntrinsicType xmm1, xmm2, xmm3, xmm4;
624  for( size_t k=0UL; k<K; ++k ) {
625  const IntrinsicType a1( set( A(i ,k) ) );
626  const IntrinsicType a2( set( A(i+1UL,k) ) );
627  const IntrinsicType b1( B.load(k,j ) );
628  const IntrinsicType b2( B.load(k,j+IT::size) );
629  xmm1 = xmm1 + a1 * b1;
630  xmm2 = xmm2 + a1 * b2;
631  xmm3 = xmm3 + a2 * b1;
632  xmm4 = xmm4 + a2 * b2;
633  }
634  (~C).store( i , j , xmm1 );
635  (~C).store( i , j+IT::size, xmm2 );
636  (~C).store( i+1UL, j , xmm3 );
637  (~C).store( i+1UL, j+IT::size, xmm4 );
638  }
639  if( i < M ) {
640  IntrinsicType xmm1, xmm2;
641  for( size_t k=0UL; k<K; ++k ) {
642  const IntrinsicType a1( set( A(i,k) ) );
643  xmm1 = xmm1 + a1 * B.load(k,j );
644  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
645  }
646  (~C).store( i, j , xmm1 );
647  (~C).store( i, j+IT::size, xmm2 );
648  }
649  }
650  if( j < N ) {
651  size_t i( 0UL );
652  for( ; (i+2UL) <= M; i+=2UL ) {
653  IntrinsicType xmm1, xmm2;
654  for( size_t k=0UL; k<K; ++k ) {
655  const IntrinsicType b1( B.load(k,j) );
656  xmm1 = xmm1 + set( A(i ,k) ) * b1;
657  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
658  }
659  (~C).store( i , j, xmm1 );
660  (~C).store( i+1UL, j, xmm2 );
661  }
662  if( i < M ) {
663  IntrinsicType xmm1;
664  for( size_t k=0UL; k<K; ++k ) {
665  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
666  }
667  (~C).store( i, j, xmm1 );
668  }
669  }
670  }
672  //**********************************************************************************************
673 
674  //**Vectorized default assignment to column-major dense matrices********************************
688  template< typename MT3 // Type of the left-hand side target matrix
689  , typename MT4 // Type of the left-hand side matrix operand
690  , typename MT5 > // Type of the right-hand side matrix operand
691  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
692  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
693  {
696 
697  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
698  const typename MT4::OppositeType tmp( serial( A ) );
699  assign( ~C, tmp * B );
700  }
701  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
702  const typename MT5::OppositeType tmp( serial( B ) );
703  assign( ~C, A * tmp );
704  }
705  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
706  const typename MT4::OppositeType tmp( serial( A ) );
707  assign( ~C, tmp * B );
708  }
709  else {
710  const typename MT5::OppositeType tmp( serial( B ) );
711  assign( ~C, A * tmp );
712  }
713  }
715  //**********************************************************************************************
716 
717  //**BLAS-based assignment to dense matrices (default)*******************************************
730  template< typename MT3 // Type of the left-hand side target matrix
731  , typename MT4 // Type of the left-hand side matrix operand
732  , typename MT5 > // Type of the right-hand side matrix operand
733  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
734  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
735  {
736  selectDefaultAssignKernel( C, A, B );
737  }
739  //**********************************************************************************************
740 
741  //**BLAS-based assignment to dense matrices (single precision)**********************************
742 #if BLAZE_BLAS_MODE
743 
756  template< typename MT3 // Type of the left-hand side target matrix
757  , typename MT4 // Type of the left-hand side matrix operand
758  , typename MT5 > // Type of the right-hand side matrix operand
759  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
760  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
761  {
762  using boost::numeric_cast;
763 
767 
768  const int M ( numeric_cast<int>( A.rows() ) );
769  const int N ( numeric_cast<int>( B.columns() ) );
770  const int K ( numeric_cast<int>( A.columns() ) );
771  const int lda( numeric_cast<int>( A.spacing() ) );
772  const int ldb( numeric_cast<int>( B.spacing() ) );
773  const int ldc( numeric_cast<int>( C.spacing() ) );
774 
775  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
776  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
777  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
778  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
779  }
781 #endif
782  //**********************************************************************************************
783 
784  //**BLAS-based assignment to dense matrices (double precision)**********************************
785 #if BLAZE_BLAS_MODE
786 
799  template< typename MT3 // Type of the left-hand side target matrix
800  , typename MT4 // Type of the left-hand side matrix operand
801  , typename MT5 > // Type of the right-hand side matrix operand
802  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
803  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
804  {
805  using boost::numeric_cast;
806 
810 
811  const int M ( numeric_cast<int>( A.rows() ) );
812  const int N ( numeric_cast<int>( B.columns() ) );
813  const int K ( numeric_cast<int>( A.columns() ) );
814  const int lda( numeric_cast<int>( A.spacing() ) );
815  const int ldb( numeric_cast<int>( B.spacing() ) );
816  const int ldc( numeric_cast<int>( C.spacing() ) );
817 
818  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
819  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
820  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
821  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
822  }
824 #endif
825  //**********************************************************************************************
826 
827  //**BLAS-based assignment to dense matrices (single precision complex)**************************
828 #if BLAZE_BLAS_MODE
829 
842  template< typename MT3 // Type of the left-hand side target matrix
843  , typename MT4 // Type of the left-hand side matrix operand
844  , typename MT5 > // Type of the right-hand side matrix operand
845  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
846  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
847  {
848  using boost::numeric_cast;
849 
853  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
854  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
855  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
856 
857  const int M ( numeric_cast<int>( A.rows() ) );
858  const int N ( numeric_cast<int>( B.columns() ) );
859  const int K ( numeric_cast<int>( A.columns() ) );
860  const int lda( numeric_cast<int>( A.spacing() ) );
861  const int ldb( numeric_cast<int>( B.spacing() ) );
862  const int ldc( numeric_cast<int>( C.spacing() ) );
863  const complex<float> alpha( 1.0F, 0.0F );
864  const complex<float> beta ( 0.0F, 0.0F );
865 
866  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
867  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
868  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
869  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
870  }
872 #endif
873  //**********************************************************************************************
874 
875  //**BLAS-based assignment to dense matrices (double precision complex)**************************
876 #if BLAZE_BLAS_MODE
877 
890  template< typename MT3 // Type of the left-hand side target matrix
891  , typename MT4 // Type of the left-hand side matrix operand
892  , typename MT5 > // Type of the right-hand side matrix operand
893  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
894  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
895  {
896  using boost::numeric_cast;
897 
901  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
902  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
903  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
904 
905  const int M ( numeric_cast<int>( A.rows() ) );
906  const int N ( numeric_cast<int>( B.columns() ) );
907  const int K ( numeric_cast<int>( A.columns() ) );
908  const int lda( numeric_cast<int>( A.spacing() ) );
909  const int ldb( numeric_cast<int>( B.spacing() ) );
910  const int ldc( numeric_cast<int>( C.spacing() ) );
911  const complex<double> alpha( 1.0, 0.0 );
912  const complex<double> beta ( 0.0, 0.0 );
913 
914  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
915  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
916  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
917  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
918  }
920 #endif
921  //**********************************************************************************************
922 
923  //**Assignment to sparse matrices***************************************************************
935  template< typename MT // Type of the target sparse matrix
936  , bool SO > // Storage order of the target sparse matrix
937  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
938  {
940 
941  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
942 
949 
950  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
951  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
952 
953  const TmpType tmp( serial( rhs ) );
954  assign( ~lhs, tmp );
955  }
957  //**********************************************************************************************
958 
959  //**Addition assignment to dense matrices*******************************************************
972  template< typename MT // Type of the target dense matrix
973  , bool SO > // Storage order of the target dense matrix
974  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
975  {
977 
978  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
979  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
980 
981  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
982  return;
983  }
984 
985  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
986  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
987 
988  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
989  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
990  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
991  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
992  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
993  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
994 
995  DMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
996  }
998  //**********************************************************************************************
999 
1000  //**Addition assignment to dense matrices (kernel selection)************************************
1011  template< typename MT3 // Type of the left-hand side target matrix
1012  , typename MT4 // Type of the left-hand side matrix operand
1013  , typename MT5 > // Type of the right-hand side matrix operand
1014  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1015  {
1016  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
1017  DMatDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1018  else
1019  DMatDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1020  }
1022  //**********************************************************************************************
1023 
1024  //**Default addition assignment to dense matrices***********************************************
1038  template< typename MT3 // Type of the left-hand side target matrix
1039  , typename MT4 // Type of the left-hand side matrix operand
1040  , typename MT5 > // Type of the right-hand side matrix operand
1041  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1042  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1043  {
1044  const size_t M( A.rows() );
1045  const size_t N( B.columns() );
1046  const size_t K( A.columns() );
1047 
1048  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1049  const size_t end( N & size_t(-2) );
1050 
1051  for( size_t i=0UL; i<M; ++i ) {
1052  for( size_t k=0UL; k<K; ++k ) {
1053  for( size_t j=0UL; j<end; j+=2UL ) {
1054  C(i,j ) += A(i,k) * B(k,j );
1055  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1056  }
1057  if( end < N ) {
1058  C(i,end) += A(i,k) * B(k,end);
1059  }
1060  }
1061  }
1062  }
1064  //**********************************************************************************************
1065 
1066  //**Vectorized default addition assignment to row-major dense matrices**************************
1080  template< typename MT3 // Type of the left-hand side target matrix
1081  , typename MT4 // Type of the left-hand side matrix operand
1082  , typename MT5 > // Type of the right-hand side matrix operand
1083  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1084  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1085  {
1086  typedef IntrinsicTrait<ElementType> IT;
1087 
1088  const size_t M( A.rows() );
1089  const size_t N( B.columns() );
1090  const size_t K( A.columns() );
1091 
1092  size_t j( 0UL );
1093 
1094  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1095  for( size_t i=0UL; i<M; ++i ) {
1096  IntrinsicType xmm1( (~C).load(i,j ) );
1097  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1098  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1099  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1100  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1101  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1102  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1103  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1104  for( size_t k=0UL; k<K; ++k ) {
1105  const IntrinsicType a1( set( A(i,k) ) );
1106  xmm1 = xmm1 + a1 * B.load(k,j );
1107  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1108  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1109  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1110  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
1111  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
1112  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
1113  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
1114  }
1115  (~C).store( i, j , xmm1 );
1116  (~C).store( i, j+IT::size , xmm2 );
1117  (~C).store( i, j+IT::size*2UL, xmm3 );
1118  (~C).store( i, j+IT::size*3UL, xmm4 );
1119  (~C).store( i, j+IT::size*4UL, xmm5 );
1120  (~C).store( i, j+IT::size*5UL, xmm6 );
1121  (~C).store( i, j+IT::size*6UL, xmm7 );
1122  (~C).store( i, j+IT::size*7UL, xmm8 );
1123  }
1124  }
1125  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1126  size_t i( 0UL );
1127  for( ; (i+2UL) <= M; i+=2UL ) {
1128  IntrinsicType xmm1( (~C).load(i ,j ) );
1129  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1130  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1131  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1132  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1133  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1134  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1135  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1136  for( size_t k=0UL; k<K; ++k ) {
1137  const IntrinsicType a1( set( A(i ,k) ) );
1138  const IntrinsicType a2( set( A(i+1UL,k) ) );
1139  const IntrinsicType b1( B.load(k,j ) );
1140  const IntrinsicType b2( B.load(k,j+IT::size ) );
1141  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
1142  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
1143  xmm1 = xmm1 + a1 * b1;
1144  xmm2 = xmm2 + a1 * b2;
1145  xmm3 = xmm3 + a1 * b3;
1146  xmm4 = xmm4 + a1 * b4;
1147  xmm5 = xmm5 + a2 * b1;
1148  xmm6 = xmm6 + a2 * b2;
1149  xmm7 = xmm7 + a2 * b3;
1150  xmm8 = xmm8 + a2 * b4;
1151  }
1152  (~C).store( i , j , xmm1 );
1153  (~C).store( i , j+IT::size , xmm2 );
1154  (~C).store( i , j+IT::size*2UL, xmm3 );
1155  (~C).store( i , j+IT::size*3UL, xmm4 );
1156  (~C).store( i+1UL, j , xmm5 );
1157  (~C).store( i+1UL, j+IT::size , xmm6 );
1158  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
1159  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
1160  }
1161  if( i < M ) {
1162  IntrinsicType xmm1( (~C).load(i,j ) );
1163  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1164  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1165  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1166  for( size_t k=0UL; k<K; ++k ) {
1167  const IntrinsicType a1( set( A(i,k) ) );
1168  xmm1 = xmm1 + a1 * B.load(k,j );
1169  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1170  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1171  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1172  }
1173  (~C).store( i, j , xmm1 );
1174  (~C).store( i, j+IT::size , xmm2 );
1175  (~C).store( i, j+IT::size*2UL, xmm3 );
1176  (~C).store( i, j+IT::size*3UL, xmm4 );
1177  }
1178  }
1179  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1180  size_t i( 0UL );
1181  for( ; (i+2UL) <= M; i+=2UL ) {
1182  IntrinsicType xmm1( (~C).load(i ,j ) );
1183  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
1184  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1185  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
1186  for( size_t k=0UL; k<K; ++k ) {
1187  const IntrinsicType a1( set( A(i ,k) ) );
1188  const IntrinsicType a2( set( A(i+1UL,k) ) );
1189  const IntrinsicType b1( B.load(k,j ) );
1190  const IntrinsicType b2( B.load(k,j+IT::size) );
1191  xmm1 = xmm1 + a1 * b1;
1192  xmm2 = xmm2 + a1 * b2;
1193  xmm3 = xmm3 + a2 * b1;
1194  xmm4 = xmm4 + a2 * b2;
1195  }
1196  (~C).store( i , j , xmm1 );
1197  (~C).store( i , j+IT::size, xmm2 );
1198  (~C).store( i+1UL, j , xmm3 );
1199  (~C).store( i+1UL, j+IT::size, xmm4 );
1200  }
1201  if( i < M ) {
1202  IntrinsicType xmm1( (~C).load(i,j ) );
1203  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
1204  for( size_t k=0UL; k<K; ++k ) {
1205  const IntrinsicType a1( set( A(i,k) ) );
1206  xmm1 = xmm1 + a1 * B.load(k,j );
1207  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
1208  }
1209  (~C).store( i, j , xmm1 );
1210  (~C).store( i, j+IT::size, xmm2 );
1211  }
1212  }
1213  if( j < N ) {
1214  size_t i( 0UL );
1215  for( ; (i+2UL) <= M; i+=2UL ) {
1216  IntrinsicType xmm1( (~C).load(i ,j) );
1217  IntrinsicType xmm2( (~C).load(i+1UL,j) );
1218  for( size_t k=0UL; k<K; ++k ) {
1219  const IntrinsicType b1( B.load(k,j) );
1220  xmm1 = xmm1 + set( A(i ,k) ) * b1;
1221  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
1222  }
1223  (~C).store( i , j, xmm1 );
1224  (~C).store( i+1UL, j, xmm2 );
1225  }
1226  if( i < M ) {
1227  IntrinsicType xmm1( (~C).load(i,j) );
1228  for( size_t k=0UL; k<K; ++k ) {
1229  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
1230  }
1231  (~C).store( i, j, xmm1 );
1232  }
1233  }
1234  }
1236  //**********************************************************************************************
1237 
1238  //**Vectorized default addition assignment to column-major dense matrices***********************
1252  template< typename MT3 // Type of the left-hand side target matrix
1253  , typename MT4 // Type of the left-hand side matrix operand
1254  , typename MT5 > // Type of the right-hand side matrix operand
1255  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1256  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1257  {
1260 
1261  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1262  const typename MT4::OppositeType tmp( serial( A ) );
1263  addAssign( ~C, tmp * B );
1264  }
1265  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1266  const typename MT5::OppositeType tmp( serial( B ) );
1267  addAssign( ~C, A * tmp );
1268  }
1269  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1270  const typename MT4::OppositeType tmp( serial( A ) );
1271  addAssign( ~C, tmp * B );
1272  }
1273  else {
1274  const typename MT5::OppositeType tmp( serial( B ) );
1275  addAssign( ~C, A * tmp );
1276  }
1277  }
1279  //**********************************************************************************************
1280 
1281  //**BLAS-based addition assignment to dense matrices (default)**********************************
1295  template< typename MT3 // Type of the left-hand side target matrix
1296  , typename MT4 // Type of the left-hand side matrix operand
1297  , typename MT5 > // Type of the right-hand side matrix operand
1298  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1299  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1300  {
1301  selectDefaultAddAssignKernel( C, A, B );
1302  }
1304  //**********************************************************************************************
1305 
1306  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1307 #if BLAZE_BLAS_MODE
1308 
1321  template< typename MT3 // Type of the left-hand side target matrix
1322  , typename MT4 // Type of the left-hand side matrix operand
1323  , typename MT5 > // Type of the right-hand side matrix operand
1324  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1325  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1326  {
1327  using boost::numeric_cast;
1328 
1332 
1333  const int M ( numeric_cast<int>( A.rows() ) );
1334  const int N ( numeric_cast<int>( B.columns() ) );
1335  const int K ( numeric_cast<int>( A.columns() ) );
1336  const int lda( numeric_cast<int>( A.spacing() ) );
1337  const int ldb( numeric_cast<int>( B.spacing() ) );
1338  const int ldc( numeric_cast<int>( C.spacing() ) );
1339 
1340  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1341  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1342  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1343  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1344  }
1346 #endif
1347  //**********************************************************************************************
1348 
1349  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1350 #if BLAZE_BLAS_MODE
1351 
1364  template< typename MT3 // Type of the left-hand side target matrix
1365  , typename MT4 // Type of the left-hand side matrix operand
1366  , typename MT5 > // Type of the right-hand side matrix operand
1367  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1368  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1369  {
1370  using boost::numeric_cast;
1371 
1375 
1376  const int M ( numeric_cast<int>( A.rows() ) );
1377  const int N ( numeric_cast<int>( B.columns() ) );
1378  const int K ( numeric_cast<int>( A.columns() ) );
1379  const int lda( numeric_cast<int>( A.spacing() ) );
1380  const int ldb( numeric_cast<int>( B.spacing() ) );
1381  const int ldc( numeric_cast<int>( C.spacing() ) );
1382 
1383  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1384  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1385  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1386  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1387  }
1389 #endif
1390  //**********************************************************************************************
1391 
1392  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1393 #if BLAZE_BLAS_MODE
1394 
1407  template< typename MT3 // Type of the left-hand side target matrix
1408  , typename MT4 // Type of the left-hand side matrix operand
1409  , typename MT5 > // Type of the right-hand side matrix operand
1410  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1411  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1412  {
1413  using boost::numeric_cast;
1414 
1418  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1419  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1420  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1421 
1422  const int M ( numeric_cast<int>( A.rows() ) );
1423  const int N ( numeric_cast<int>( B.columns() ) );
1424  const int K ( numeric_cast<int>( A.columns() ) );
1425  const int lda( numeric_cast<int>( A.spacing() ) );
1426  const int ldb( numeric_cast<int>( B.spacing() ) );
1427  const int ldc( numeric_cast<int>( C.spacing() ) );
1428  const complex<float> alpha( 1.0F, 0.0F );
1429  const complex<float> beta ( 1.0F, 0.0F );
1430 
1431  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1432  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1433  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1434  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1435  }
1437 #endif
1438  //**********************************************************************************************
1439 
1440  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1441 #if BLAZE_BLAS_MODE
1442 
1455  template< typename MT3 // Type of the left-hand side target matrix
1456  , typename MT4 // Type of the left-hand side matrix operand
1457  , typename MT5 > // Type of the right-hand side matrix operand
1458  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1459  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1460  {
1461  using boost::numeric_cast;
1462 
1466  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1467  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1468  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1469 
1470  const int M ( numeric_cast<int>( A.rows() ) );
1471  const int N ( numeric_cast<int>( B.columns() ) );
1472  const int K ( numeric_cast<int>( A.columns() ) );
1473  const int lda( numeric_cast<int>( A.spacing() ) );
1474  const int ldb( numeric_cast<int>( B.spacing() ) );
1475  const int ldc( numeric_cast<int>( C.spacing() ) );
1476  const complex<double> alpha( 1.0, 0.0 );
1477  const complex<double> beta ( 1.0, 0.0 );
1478 
1479  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1480  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1481  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1482  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1483  }
1485 #endif
1486  //**********************************************************************************************
1487 
1488  //**Addition assignment to sparse matrices******************************************************
1489  // No special implementation for the addition assignment to sparse matrices.
1490  //**********************************************************************************************
1491 
1492  //**Subtraction assignment to dense matrices****************************************************
1505  template< typename MT // Type of the target dense matrix
1506  , bool SO > // Storage order of the target dense matrix
1507  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
1508  {
1510 
1511  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1512  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1513 
1514  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1515  return;
1516  }
1517 
1518  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1519  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1520 
1521  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1522  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1523  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1524  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1525  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1526  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1527 
1528  DMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1529  }
1531  //**********************************************************************************************
1532 
1533  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1544  template< typename MT3 // Type of the left-hand side target matrix
1545  , typename MT4 // Type of the left-hand side matrix operand
1546  , typename MT5 > // Type of the right-hand side matrix operand
1547  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1548  {
1549  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
1550  DMatDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1551  else
1552  DMatDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1553  }
1555  //**********************************************************************************************
1556 
1557  //**Default subtraction assignment to dense matrices********************************************
1571  template< typename MT3 // Type of the left-hand side target matrix
1572  , typename MT4 // Type of the left-hand side matrix operand
1573  , typename MT5 > // Type of the right-hand side matrix operand
1574  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1575  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1576  {
1577  const size_t M( A.rows() );
1578  const size_t N( B.columns() );
1579  const size_t K( A.columns() );
1580 
1581  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1582  const size_t end( N & size_t(-2) );
1583 
1584  for( size_t i=0UL; i<M; ++i ) {
1585  for( size_t k=0UL; k<K; ++k ) {
1586  for( size_t j=0UL; j<end; j+=2UL ) {
1587  C(i,j ) -= A(i,k) * B(k,j );
1588  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1589  }
1590  if( end < N ) {
1591  C(i,end) -= A(i,k) * B(k,end);
1592  }
1593  }
1594  }
1595  }
1597  //**********************************************************************************************
1598 
1599  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1613  template< typename MT3 // Type of the left-hand side target matrix
1614  , typename MT4 // Type of the left-hand side matrix operand
1615  , typename MT5 > // Type of the right-hand side matrix operand
1616  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1617  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1618  {
1619  typedef IntrinsicTrait<ElementType> IT;
1620 
1621  const size_t M( A.rows() );
1622  const size_t N( B.columns() );
1623  const size_t K( A.columns() );
1624 
1625  size_t j( 0UL );
1626 
1627  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1628  for( size_t i=0UL; i<M; ++i ) {
1629  IntrinsicType xmm1( (~C).load(i,j ) );
1630  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1631  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1632  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1633  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1634  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1635  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1636  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1637  for( size_t k=0UL; k<K; ++k ) {
1638  const IntrinsicType a1( set( A(i,k) ) );
1639  xmm1 = xmm1 - a1 * B.load(k,j );
1640  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1641  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1642  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1643  xmm5 = xmm5 - a1 * B.load(k,j+IT::size*4UL);
1644  xmm6 = xmm6 - a1 * B.load(k,j+IT::size*5UL);
1645  xmm7 = xmm7 - a1 * B.load(k,j+IT::size*6UL);
1646  xmm8 = xmm8 - a1 * B.load(k,j+IT::size*7UL);
1647  }
1648  (~C).store( i, j , xmm1 );
1649  (~C).store( i, j+IT::size , xmm2 );
1650  (~C).store( i, j+IT::size*2UL, xmm3 );
1651  (~C).store( i, j+IT::size*3UL, xmm4 );
1652  (~C).store( i, j+IT::size*4UL, xmm5 );
1653  (~C).store( i, j+IT::size*5UL, xmm6 );
1654  (~C).store( i, j+IT::size*6UL, xmm7 );
1655  (~C).store( i, j+IT::size*7UL, xmm8 );
1656  }
1657  }
1658  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1659  size_t i( 0UL );
1660  for( ; (i+2UL) <= M; i+=2UL ) {
1661  IntrinsicType xmm1( (~C).load(i ,j ) );
1662  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1663  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1664  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1665  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1666  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1667  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1668  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1669  for( size_t k=0UL; k<K; ++k ) {
1670  const IntrinsicType a1( set( A(i ,k) ) );
1671  const IntrinsicType a2( set( A(i+1UL,k) ) );
1672  const IntrinsicType b1( B.load(k,j ) );
1673  const IntrinsicType b2( B.load(k,j+IT::size ) );
1674  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
1675  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
1676  xmm1 = xmm1 - a1 * b1;
1677  xmm2 = xmm2 - a1 * b2;
1678  xmm3 = xmm3 - a1 * b3;
1679  xmm4 = xmm4 - a1 * b4;
1680  xmm5 = xmm5 - a2 * b1;
1681  xmm6 = xmm6 - a2 * b2;
1682  xmm7 = xmm7 - a2 * b3;
1683  xmm8 = xmm8 - a2 * b4;
1684  }
1685  (~C).store( i , j , xmm1 );
1686  (~C).store( i , j+IT::size , xmm2 );
1687  (~C).store( i , j+IT::size*2UL, xmm3 );
1688  (~C).store( i , j+IT::size*3UL, xmm4 );
1689  (~C).store( i+1UL, j , xmm5 );
1690  (~C).store( i+1UL, j+IT::size , xmm6 );
1691  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
1692  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
1693  }
1694  if( i < M ) {
1695  IntrinsicType xmm1( (~C).load(i,j ) );
1696  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1697  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1698  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1699  for( size_t k=0UL; k<K; ++k ) {
1700  const IntrinsicType a1( set( A(i,k) ) );
1701  xmm1 = xmm1 - a1 * B.load(k,j );
1702  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1703  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1704  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1705  }
1706  (~C).store( i, j , xmm1 );
1707  (~C).store( i, j+IT::size , xmm2 );
1708  (~C).store( i, j+IT::size*2UL, xmm3 );
1709  (~C).store( i, j+IT::size*3UL, xmm4 );
1710  }
1711  }
1712  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1713  size_t i( 0UL );
1714  for( ; (i+2UL) <= M; i+=2UL ) {
1715  IntrinsicType xmm1( (~C).load(i ,j ) );
1716  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
1717  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1718  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
1719  for( size_t k=0UL; k<K; ++k ) {
1720  const IntrinsicType a1( set( A(i ,k) ) );
1721  const IntrinsicType a2( set( A(i+1UL,k) ) );
1722  const IntrinsicType b1( B.load(k,j ) );
1723  const IntrinsicType b2( B.load(k,j+IT::size) );
1724  xmm1 = xmm1 - a1 * b1;
1725  xmm2 = xmm2 - a1 * b2;
1726  xmm3 = xmm3 - a2 * b1;
1727  xmm4 = xmm4 - a2 * b2;
1728  }
1729  (~C).store( i , j , xmm1 );
1730  (~C).store( i , j+IT::size, xmm2 );
1731  (~C).store( i+1UL, j , xmm3 );
1732  (~C).store( i+1UL, j+IT::size, xmm4 );
1733  }
1734  if( i < M ) {
1735  IntrinsicType xmm1( (~C).load(i,j ) );
1736  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
1737  for( size_t k=0UL; k<K; ++k ) {
1738  const IntrinsicType a1( set( A(i,k) ) );
1739  xmm1 = xmm1 - a1 * B.load(k,j );
1740  xmm2 = xmm2 - a1 * B.load(k,j+IT::size);
1741  }
1742  (~C).store( i, j , xmm1 );
1743  (~C).store( i, j+IT::size, xmm2 );
1744  }
1745  }
1746  if( j < N ) {
1747  size_t i( 0UL );
1748  for( ; (i+2UL) <= M; i+=2UL ) {
1749  IntrinsicType xmm1( (~C).load(i ,j) );
1750  IntrinsicType xmm2( (~C).load(i+1UL,j) );
1751  for( size_t k=0UL; k<K; ++k ) {
1752  const IntrinsicType b1( B.load(k,j) );
1753  xmm1 = xmm1 - set( A(i ,k) ) * b1;
1754  xmm2 = xmm2 - set( A(i+1UL,k) ) * b1;
1755  }
1756  (~C).store( i , j, xmm1 );
1757  (~C).store( i+1UL, j, xmm2 );
1758  }
1759  if( i < M ) {
1760  IntrinsicType xmm1( (~C).load(i,j) );
1761  for( size_t k=0UL; k<K; ++k ) {
1762  xmm1 = xmm1 - set( A(i,k) ) * B.load(k,j);
1763  }
1764  (~C).store( i, j, xmm1 );
1765  }
1766  }
1767  }
1769  //**********************************************************************************************
1770 
1771  //**Vectorized default subtraction assignment to column-major dense matrices********************
1785  template< typename MT3 // Type of the left-hand side target matrix
1786  , typename MT4 // Type of the left-hand side matrix operand
1787  , typename MT5 > // Type of the right-hand side matrix operand
1788  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1789  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1790  {
1793 
1794  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1795  const typename MT4::OppositeType tmp( serial( A ) );
1796  subAssign( ~C, tmp * B );
1797  }
1798  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1799  const typename MT5::OppositeType tmp( serial( B ) );
1800  subAssign( ~C, A * tmp );
1801  }
1802  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1803  const typename MT4::OppositeType tmp( serial( A ) );
1804  subAssign( ~C, tmp * B );
1805  }
1806  else {
1807  const typename MT5::OppositeType tmp( serial( B ) );
1808  subAssign( ~C, A * tmp );
1809  }
1810  }
1812  //**********************************************************************************************
1813 
1814  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1828  template< typename MT3 // Type of the left-hand side target matrix
1829  , typename MT4 // Type of the left-hand side matrix operand
1830  , typename MT5 > // Type of the right-hand side matrix operand
1831  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1832  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1833  {
1834  selectDefaultSubAssignKernel( C, A, B );
1835  }
1837  //**********************************************************************************************
1838 
1839  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1840 #if BLAZE_BLAS_MODE
1841 
1854  template< typename MT3 // Type of the left-hand side target matrix
1855  , typename MT4 // Type of the left-hand side matrix operand
1856  , typename MT5 > // Type of the right-hand side matrix operand
1857  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1858  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1859  {
1860  using boost::numeric_cast;
1861 
1865 
1866  const int M ( numeric_cast<int>( A.rows() ) );
1867  const int N ( numeric_cast<int>( B.columns() ) );
1868  const int K ( numeric_cast<int>( A.columns() ) );
1869  const int lda( numeric_cast<int>( A.spacing() ) );
1870  const int ldb( numeric_cast<int>( B.spacing() ) );
1871  const int ldc( numeric_cast<int>( C.spacing() ) );
1872 
1873  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1874  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1875  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1876  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1877  }
1879 #endif
1880  //**********************************************************************************************
1881 
1882  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1883 #if BLAZE_BLAS_MODE
1884 
1897  template< typename MT3 // Type of the left-hand side target matrix
1898  , typename MT4 // Type of the left-hand side matrix operand
1899  , typename MT5 > // Type of the right-hand side matrix operand
1900  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1901  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1902  {
1903  using boost::numeric_cast;
1904 
1908 
1909  const int M ( numeric_cast<int>( A.rows() ) );
1910  const int N ( numeric_cast<int>( B.columns() ) );
1911  const int K ( numeric_cast<int>( A.columns() ) );
1912  const int lda( numeric_cast<int>( A.spacing() ) );
1913  const int ldb( numeric_cast<int>( B.spacing() ) );
1914  const int ldc( numeric_cast<int>( C.spacing() ) );
1915 
1916  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1917  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1918  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1919  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1920  }
1922 #endif
1923  //**********************************************************************************************
1924 
1925  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1926 #if BLAZE_BLAS_MODE
1927 
1940  template< typename MT3 // Type of the left-hand side target matrix
1941  , typename MT4 // Type of the left-hand side matrix operand
1942  , typename MT5 > // Type of the right-hand side matrix operand
1943  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1944  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1945  {
1946  using boost::numeric_cast;
1947 
1951  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1952  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1953  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1954 
1955  const int M ( numeric_cast<int>( A.rows() ) );
1956  const int N ( numeric_cast<int>( B.columns() ) );
1957  const int K ( numeric_cast<int>( A.columns() ) );
1958  const int lda( numeric_cast<int>( A.spacing() ) );
1959  const int ldb( numeric_cast<int>( B.spacing() ) );
1960  const int ldc( numeric_cast<int>( C.spacing() ) );
1961  const complex<float> alpha( -1.0F, 0.0F );
1962  const complex<float> beta ( 1.0F, 0.0F );
1963 
1964  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1965  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1966  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1967  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1968  }
1970 #endif
1971  //**********************************************************************************************
1972 
1973  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1974 #if BLAZE_BLAS_MODE
1975 
1988  template< typename MT3 // Type of the left-hand side target matrix
1989  , typename MT4 // Type of the left-hand side matrix operand
1990  , typename MT5 > // Type of the right-hand side matrix operand
1991  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1992  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1993  {
1994  using boost::numeric_cast;
1995 
1999  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2000  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2001  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2002 
2003  const int M ( numeric_cast<int>( A.rows() ) );
2004  const int N ( numeric_cast<int>( B.columns() ) );
2005  const int K ( numeric_cast<int>( A.columns() ) );
2006  const int lda( numeric_cast<int>( A.spacing() ) );
2007  const int ldb( numeric_cast<int>( B.spacing() ) );
2008  const int ldc( numeric_cast<int>( C.spacing() ) );
2009  const complex<double> alpha( -1.0, 0.0 );
2010  const complex<double> beta ( 1.0, 0.0 );
2011 
2012  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2013  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2014  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2015  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2016  }
2018 #endif
2019  //**********************************************************************************************
2020 
2021  //**Subtraction assignment to sparse matrices***************************************************
2022  // No special implementation for the subtraction assignment to sparse matrices.
2023  //**********************************************************************************************
2024 
2025  //**Multiplication assignment to dense matrices*************************************************
2026  // No special implementation for the multiplication assignment to dense matrices.
2027  //**********************************************************************************************
2028 
2029  //**Multiplication assignment to sparse matrices************************************************
2030  // No special implementation for the multiplication assignment to sparse matrices.
2031  //**********************************************************************************************
2032 
2033  //**SMP assignment to dense matrices************************************************************
2048  template< typename MT // Type of the target dense matrix
2049  , bool SO > // Storage order of the target dense matrix
2050  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2051  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2052  {
2054 
2055  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2056  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2057 
2058  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2059  return;
2060  }
2061  else if( rhs.lhs_.columns() == 0UL ) {
2062  reset( ~lhs );
2063  return;
2064  }
2065 
2066  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2067  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2068 
2069  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2070  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2071  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2072  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2073  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2074  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2075 
2076  smpAssign( ~lhs, A * B );
2077  }
2079  //**********************************************************************************************
2080 
2081  //**SMP assignment to sparse matrices***********************************************************
2095  template< typename MT // Type of the target sparse matrix
2096  , bool SO > // Storage order of the target sparse matrix
2097  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2098  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2099  {
2101 
2102  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2103 
2110 
2111  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2112  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2113 
2114  const TmpType tmp( rhs );
2115  smpAssign( ~lhs, tmp );
2116  }
2118  //**********************************************************************************************
2119 
2120  //**SMP addition assignment to dense matrices***************************************************
2135  template< typename MT // Type of the target dense matrix
2136  , bool SO > // Storage order of the target dense matrix
2137  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2138  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2139  {
2141 
2142  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2143  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2144 
2145  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2146  return;
2147  }
2148 
2149  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2150  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2151 
2152  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2153  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2154  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2155  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2156  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2157  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2158 
2159  smpAddAssign( ~lhs, A * B );
2160  }
2162  //**********************************************************************************************
2163 
2164  //**SMP addition assignment to sparse matrices**************************************************
2165  // No special implementation for the SMP addition assignment to sparse matrices.
2166  //**********************************************************************************************
2167 
2168  //**SMP subtraction assignment to dense matrices************************************************
2183  template< typename MT // Type of the target dense matrix
2184  , bool SO > // Storage order of the target dense matrix
2185  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2186  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2187  {
2189 
2190  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2191  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2192 
2193  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2194  return;
2195  }
2196 
2197  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2198  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2199 
2200  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2201  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2202  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2203  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2204  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2205  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2206 
2207  smpSubAssign( ~lhs, A * B );
2208  }
2210  //**********************************************************************************************
2211 
2212  //**SMP subtraction assignment to sparse matrices***********************************************
2213  // No special implementation for the SMP subtraction assignment to sparse matrices.
2214  //**********************************************************************************************
2215 
2216  //**SMP multiplication assignment to dense matrices*********************************************
2217  // No special implementation for the SMP multiplication assignment to dense matrices.
2218  //**********************************************************************************************
2219 
2220  //**SMP multiplication assignment to sparse matrices********************************************
2221  // No special implementation for the SMP multiplication assignment to sparse matrices.
2222  //**********************************************************************************************
2223 
2224  //**Compile time checks*************************************************************************
2231  //**********************************************************************************************
2232 };
2233 //*************************************************************************************************
2234 
2235 
2236 
2237 
2238 //=================================================================================================
2239 //
2240 // DMATSCALARMULTEXPR SPECIALIZATION
2241 //
2242 //=================================================================================================
2243 
2244 //*************************************************************************************************
2252 template< typename MT1 // Type of the left-hand side dense matrix
2253  , typename MT2 // Type of the right-hand side dense matrix
2254  , typename ST > // Type of the right-hand side scalar value
2255 class DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >
2256  : public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
2257  , private MatScalarMultExpr
2258  , private Computation
2259 {
2260  private:
2261  //**Type definitions****************************************************************************
2262  typedef DMatDMatMultExpr<MT1,MT2> MMM;
2263  typedef typename MMM::ResultType RES;
2264  typedef typename MT1::ResultType RT1;
2265  typedef typename MT2::ResultType RT2;
2266  typedef typename RT1::ElementType ET1;
2267  typedef typename RT2::ElementType ET2;
2268  typedef typename MT1::CompositeType CT1;
2269  typedef typename MT2::CompositeType CT2;
2270  //**********************************************************************************************
2271 
2272  //**********************************************************************************************
2274  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2275  //**********************************************************************************************
2276 
2277  //**********************************************************************************************
2279  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2280  //**********************************************************************************************
2281 
2282  //**********************************************************************************************
2284 
2287  template< typename MT >
2288  struct UseSMPAssign {
2289  enum { value = ( evaluateLeft || evaluateRight ) };
2290  };
2291  //**********************************************************************************************
2292 
2293  //**********************************************************************************************
2295 
2298  template< typename T1, typename T2, typename T3, typename T4 >
2299  struct UseSinglePrecisionKernel {
2300  enum { value = IsFloat<typename T1::ElementType>::value &&
2301  IsFloat<typename T2::ElementType>::value &&
2302  IsFloat<typename T3::ElementType>::value &&
2303  !IsComplex<T4>::value };
2304  };
2305  //**********************************************************************************************
2306 
2307  //**********************************************************************************************
2309 
2312  template< typename T1, typename T2, typename T3, typename T4 >
2313  struct UseDoublePrecisionKernel {
2314  enum { value = IsDouble<typename T1::ElementType>::value &&
2315  IsDouble<typename T2::ElementType>::value &&
2316  IsDouble<typename T3::ElementType>::value &&
2317  !IsComplex<T4>::value };
2318  };
2319  //**********************************************************************************************
2320 
2321  //**********************************************************************************************
2323 
2326  template< typename T1, typename T2, typename T3 >
2327  struct UseSinglePrecisionComplexKernel {
2328  typedef complex<float> Type;
2329  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2330  IsSame<typename T2::ElementType,Type>::value &&
2331  IsSame<typename T3::ElementType,Type>::value };
2332  };
2333  //**********************************************************************************************
2334 
2335  //**********************************************************************************************
2337 
2340  template< typename T1, typename T2, typename T3 >
2341  struct UseDoublePrecisionComplexKernel {
2342  typedef complex<double> Type;
2343  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2344  IsSame<typename T2::ElementType,Type>::value &&
2345  IsSame<typename T3::ElementType,Type>::value };
2346  };
2347  //**********************************************************************************************
2348 
2349  //**********************************************************************************************
2351 
2353  template< typename T1, typename T2, typename T3, typename T4 >
2354  struct UseDefaultKernel {
2355  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2356  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2357  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2358  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2359  };
2360  //**********************************************************************************************
2361 
2362  //**********************************************************************************************
2364 
2366  template< typename T1, typename T2, typename T3, typename T4 >
2367  struct UseVectorizedDefaultKernel {
2368  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2369  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2370  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2371  IsSame<typename T1::ElementType,T4>::value &&
2372  IntrinsicTrait<typename T1::ElementType>::addition &&
2373  IntrinsicTrait<typename T1::ElementType>::subtraction &&
2374  IntrinsicTrait<typename T1::ElementType>::multiplication };
2375  };
2376  //**********************************************************************************************
2377 
2378  public:
2379  //**Type definitions****************************************************************************
2380  typedef DMatScalarMultExpr<MMM,ST,false> This;
2381  typedef typename MultTrait<RES,ST>::Type ResultType;
2382  typedef typename ResultType::OppositeType OppositeType;
2383  typedef typename ResultType::TransposeType TransposeType;
2384  typedef typename ResultType::ElementType ElementType;
2385  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2386  typedef const ElementType ReturnType;
2387  typedef const ResultType CompositeType;
2388 
2390  typedef const DMatDMatMultExpr<MT1,MT2> LeftOperand;
2391 
2393  typedef ST RightOperand;
2394 
2396  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2397 
2399  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2400  //**********************************************************************************************
2401 
2402  //**Compilation flags***************************************************************************
2404  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2405  IsSame<ET1,ET2>::value &&
2406  IsSame<ET1,ST>::value &&
2407  IntrinsicTrait<ET1>::addition &&
2408  IntrinsicTrait<ET1>::multiplication };
2409 
2411  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2412  !evaluateRight && MT2::smpAssignable };
2413  //**********************************************************************************************
2414 
2415  //**Constructor*********************************************************************************
2421  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2422  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2423  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2424  {}
2425  //**********************************************************************************************
2426 
2427  //**Access operator*****************************************************************************
2434  inline ReturnType operator()( size_t i, size_t j ) const {
2435  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2436  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2437  return matrix_(i,j) * scalar_;
2438  }
2439  //**********************************************************************************************
2440 
2441  //**Rows function*******************************************************************************
2446  inline size_t rows() const {
2447  return matrix_.rows();
2448  }
2449  //**********************************************************************************************
2450 
2451  //**Columns function****************************************************************************
2456  inline size_t columns() const {
2457  return matrix_.columns();
2458  }
2459  //**********************************************************************************************
2460 
2461  //**Left operand access*************************************************************************
2466  inline LeftOperand leftOperand() const {
2467  return matrix_;
2468  }
2469  //**********************************************************************************************
2470 
2471  //**Right operand access************************************************************************
2476  inline RightOperand rightOperand() const {
2477  return scalar_;
2478  }
2479  //**********************************************************************************************
2480 
2481  //**********************************************************************************************
2487  template< typename T >
2488  inline bool canAlias( const T* alias ) const {
2489  return matrix_.canAlias( alias );
2490  }
2491  //**********************************************************************************************
2492 
2493  //**********************************************************************************************
2499  template< typename T >
2500  inline bool isAliased( const T* alias ) const {
2501  return matrix_.isAliased( alias );
2502  }
2503  //**********************************************************************************************
2504 
2505  //**********************************************************************************************
2510  inline bool isAligned() const {
2511  return matrix_.isAligned();
2512  }
2513  //**********************************************************************************************
2514 
2515  //**********************************************************************************************
2520  inline bool canSMPAssign() const {
2521  typename MMM::LeftOperand A( matrix_.leftOperand() );
2522  return ( !BLAZE_BLAS_IS_PARALLEL ||
2523  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
2524  ( A.rows() > SMP_DMATDMATMULT_THRESHOLD );
2525  }
2526  //**********************************************************************************************
2527 
2528  private:
2529  //**Member variables****************************************************************************
2530  LeftOperand matrix_;
2531  RightOperand scalar_;
2532  //**********************************************************************************************
2533 
2534  //**Assignment to dense matrices****************************************************************
2546  template< typename MT // Type of the target dense matrix
2547  , bool SO > // Storage order of the target dense matrix
2548  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2549  {
2551 
2552  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2553  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2554 
2555  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2556  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2557 
2558  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2559  return;
2560  }
2561  else if( left.columns() == 0UL ) {
2562  reset( ~lhs );
2563  return;
2564  }
2565 
2566  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2567  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
2568 
2569  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2570  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2571  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2572  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2573  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2574  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2575 
2576  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2577  }
2578  //**********************************************************************************************
2579 
2580  //**Assignment to dense matrices (kernel selection)*********************************************
2591  template< typename MT3 // Type of the left-hand side target matrix
2592  , typename MT4 // Type of the left-hand side matrix operand
2593  , typename MT5 // Type of the right-hand side matrix operand
2594  , typename ST2 > // Type of the scalar value
2595  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2596  {
2597  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
2598  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2599  else
2600  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2601  }
2602  //**********************************************************************************************
2603 
2604  //**Default assignment to dense matrices********************************************************
2618  template< typename MT3 // Type of the left-hand side target matrix
2619  , typename MT4 // Type of the left-hand side matrix operand
2620  , typename MT5 // Type of the right-hand side matrix operand
2621  , typename ST2 > // Type of the scalar value
2622  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2623  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2624  {
2625  const size_t M( A.rows() );
2626  const size_t N( B.columns() );
2627  const size_t K( A.columns() );
2628 
2629  for( size_t i=0UL; i<M; ++i ) {
2630  for( size_t j=0UL; j<N; ++j ) {
2631  C(i,j) = A(i,0UL) * B(0UL,j);
2632  }
2633  for( size_t k=1UL; k<K; ++k ) {
2634  for( size_t j=0UL; j<N; ++j ) {
2635  C(i,j) += A(i,k) * B(k,j);
2636  }
2637  }
2638  for( size_t j=0UL; j<N; ++j ) {
2639  C(i,j) *= scalar;
2640  }
2641  }
2642  }
2643  //**********************************************************************************************
2644 
2645  //**Vectorized default assignment to row-major dense matrices***********************************
2659  template< typename MT3 // Type of the left-hand side target matrix
2660  , typename MT4 // Type of the left-hand side matrix operand
2661  , typename MT5 // Type of the right-hand side matrix operand
2662  , typename ST2 > // Type of the scalar value
2663  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2664  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2665  {
2666  typedef IntrinsicTrait<ElementType> IT;
2667 
2668  const size_t M( A.rows() );
2669  const size_t N( B.columns() );
2670  const size_t K( A.columns() );
2671 
2672  const IntrinsicType factor( set( scalar ) );
2673 
2674  size_t j( 0UL );
2675 
2676  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2677  for( size_t i=0UL; i<M; ++i ) {
2678  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2679  for( size_t k=0UL; k<K; ++k ) {
2680  const IntrinsicType a1( set( A(i,k) ) );
2681  xmm1 = xmm1 + a1 * B.load(k,j );
2682  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2683  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2684  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2685  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
2686  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
2687  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
2688  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
2689  }
2690  (~C).store( i, j , xmm1 * factor );
2691  (~C).store( i, j+IT::size , xmm2 * factor );
2692  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
2693  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
2694  (~C).store( i, j+IT::size*4UL, xmm5 * factor );
2695  (~C).store( i, j+IT::size*5UL, xmm6 * factor );
2696  (~C).store( i, j+IT::size*6UL, xmm7 * factor );
2697  (~C).store( i, j+IT::size*7UL, xmm8 * factor );
2698  }
2699  }
2700  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2701  size_t i( 0UL );
2702  for( ; (i+2UL) <= M; i+=2UL ) {
2703  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2704  for( size_t k=0UL; k<K; ++k ) {
2705  const IntrinsicType a1( set( A(i ,k) ) );
2706  const IntrinsicType a2( set( A(i+1UL,k) ) );
2707  const IntrinsicType b1( B.load(k,j ) );
2708  const IntrinsicType b2( B.load(k,j+IT::size ) );
2709  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
2710  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
2711  xmm1 = xmm1 + a1 * b1;
2712  xmm2 = xmm2 + a1 * b2;
2713  xmm3 = xmm3 + a1 * b3;
2714  xmm4 = xmm4 + a1 * b4;
2715  xmm5 = xmm5 + a2 * b1;
2716  xmm6 = xmm6 + a2 * b2;
2717  xmm7 = xmm7 + a2 * b3;
2718  xmm8 = xmm8 + a2 * b4;
2719  }
2720  (~C).store( i , j , xmm1 * factor );
2721  (~C).store( i , j+IT::size , xmm2 * factor );
2722  (~C).store( i , j+IT::size*2UL, xmm3 * factor );
2723  (~C).store( i , j+IT::size*3UL, xmm4 * factor );
2724  (~C).store( i+1UL, j , xmm5 * factor );
2725  (~C).store( i+1UL, j+IT::size , xmm6 * factor );
2726  (~C).store( i+1UL, j+IT::size*2UL, xmm7 * factor );
2727  (~C).store( i+1UL, j+IT::size*3UL, xmm8 * factor );
2728  }
2729  if( i < M ) {
2730  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2731  for( size_t k=0UL; k<K; ++k ) {
2732  const IntrinsicType a1( set( A(i,k) ) );
2733  xmm1 = xmm1 + a1 * B.load(k,j );
2734  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2735  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2736  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2737  }
2738  (~C).store( i, j , xmm1 * factor );
2739  (~C).store( i, j+IT::size , xmm2 * factor );
2740  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
2741  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
2742  }
2743  }
2744  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2745  size_t i( 0UL );
2746  for( ; (i+2UL) <= M; i+=2UL ) {
2747  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2748  for( size_t k=0UL; k<K; ++k ) {
2749  const IntrinsicType a1( set( A(i ,k) ) );
2750  const IntrinsicType a2( set( A(i+1UL,k) ) );
2751  const IntrinsicType b1( B.load(k,j ) );
2752  const IntrinsicType b2( B.load(k,j+IT::size) );
2753  xmm1 = xmm1 + a1 * b1;
2754  xmm2 = xmm2 + a1 * b2;
2755  xmm3 = xmm3 + a2 * b1;
2756  xmm4 = xmm4 + a2 * b2;
2757  }
2758  (~C).store( i , j , xmm1 * factor );
2759  (~C).store( i , j+IT::size, xmm2 * factor );
2760  (~C).store( i+1UL, j , xmm3 * factor );
2761  (~C).store( i+1UL, j+IT::size, xmm4 * factor );
2762  }
2763  if( i < M ) {
2764  IntrinsicType xmm1, xmm2;
2765  for( size_t k=0UL; k<K; ++k ) {
2766  const IntrinsicType a1( set( A(i,k) ) );
2767  xmm1 = xmm1 + a1 * B.load(k,j );
2768  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
2769  }
2770  (~C).store( i, j , xmm1 * factor );
2771  (~C).store( i, j+IT::size, xmm2 * factor );
2772  }
2773  }
2774  if( j < N ) {
2775  size_t i( 0UL );
2776  for( ; (i+2UL) <= M; i+=2UL ) {
2777  IntrinsicType xmm1, xmm2;
2778  for( size_t k=0UL; k<K; ++k ) {
2779  const IntrinsicType b1( B.load(k,j) );
2780  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2781  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2782  }
2783  (~C).store( i , j, xmm1 * factor );
2784  (~C).store( i+1UL, j, xmm2 * factor );
2785  }
2786  if( i < M ) {
2787  IntrinsicType xmm1;
2788  for( size_t k=0UL; k<K; ++k ) {
2789  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
2790  }
2791  (~C).store( i, j, xmm1 * factor );
2792  }
2793  }
2794  }
2795  //**********************************************************************************************
2796 
2797  //**Vectorized default assignment to column-major dense matrices********************************
2811  template< typename MT3 // Type of the left-hand side target matrix
2812  , typename MT4 // Type of the left-hand side matrix operand
2813  , typename MT5 // Type of the right-hand side matrix operand
2814  , typename ST2 > // Type of the scalar value
2815  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2816  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2817  {
2820 
2821  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2822  const typename MT4::OppositeType tmp( serial( A ) );
2823  assign( ~C, tmp * B * scalar );
2824  }
2825  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2826  const typename MT5::OppositeType tmp( serial( B ) );
2827  assign( ~C, A * tmp * scalar );
2828  }
2829  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2830  const typename MT4::OppositeType tmp( serial( A ) );
2831  assign( ~C, tmp * B * scalar );
2832  }
2833  else {
2834  const typename MT5::OppositeType tmp( serial( B ) );
2835  assign( ~C, A * tmp * scalar );
2836  }
2837  }
2838  //**********************************************************************************************
2839 
2840  //**BLAS-based assignment to dense matrices (default)*******************************************
2854  template< typename MT3 // Type of the left-hand side target matrix
2855  , typename MT4 // Type of the left-hand side matrix operand
2856  , typename MT5 // Type of the right-hand side matrix operand
2857  , typename ST2 > // Type of the scalar value
2858  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2859  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2860  {
2861  selectDefaultAssignKernel( C, A, B, scalar );
2862  }
2863  //**********************************************************************************************
2864 
2865  //**BLAS-based assignment to dense matrices (single precision)**********************************
2866 #if BLAZE_BLAS_MODE
2867 
2880  template< typename MT3 // Type of the left-hand side target matrix
2881  , typename MT4 // Type of the left-hand side matrix operand
2882  , typename MT5 // Type of the right-hand side matrix operand
2883  , typename ST2 > // Type of the scalar value
2884  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2885  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2886  {
2887  using boost::numeric_cast;
2888 
2892 
2893  const int M ( numeric_cast<int>( A.rows() ) );
2894  const int N ( numeric_cast<int>( B.columns() ) );
2895  const int K ( numeric_cast<int>( A.columns() ) );
2896  const int lda( numeric_cast<int>( A.spacing() ) );
2897  const int ldb( numeric_cast<int>( B.spacing() ) );
2898  const int ldc( numeric_cast<int>( C.spacing() ) );
2899 
2900  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2901  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2902  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2903  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2904  }
2905 #endif
2906  //**********************************************************************************************
2907 
2908  //**BLAS-based assignment to dense matrices (double precision)**********************************
2909 #if BLAZE_BLAS_MODE
2910 
2923  template< typename MT3 // Type of the left-hand side target matrix
2924  , typename MT4 // Type of the left-hand side matrix operand
2925  , typename MT5 // Type of the right-hand side matrix operand
2926  , typename ST2 > // Type of the scalar value
2927  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2928  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2929  {
2930  using boost::numeric_cast;
2931 
2935 
2936  const int M ( numeric_cast<int>( A.rows() ) );
2937  const int N ( numeric_cast<int>( B.columns() ) );
2938  const int K ( numeric_cast<int>( A.columns() ) );
2939  const int lda( numeric_cast<int>( A.spacing() ) );
2940  const int ldb( numeric_cast<int>( B.spacing() ) );
2941  const int ldc( numeric_cast<int>( C.spacing() ) );
2942 
2943  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2944  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2945  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2946  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2947  }
2948 #endif
2949  //**********************************************************************************************
2950 
2951  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2952 #if BLAZE_BLAS_MODE
2953 
2966  template< typename MT3 // Type of the left-hand side target matrix
2967  , typename MT4 // Type of the left-hand side matrix operand
2968  , typename MT5 // Type of the right-hand side matrix operand
2969  , typename ST2 > // Type of the scalar value
2970  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2971  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2972  {
2973  using boost::numeric_cast;
2974 
2978  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2979  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2980  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2981 
2982  const int M ( numeric_cast<int>( A.rows() ) );
2983  const int N ( numeric_cast<int>( B.columns() ) );
2984  const int K ( numeric_cast<int>( A.columns() ) );
2985  const int lda( numeric_cast<int>( A.spacing() ) );
2986  const int ldb( numeric_cast<int>( B.spacing() ) );
2987  const int ldc( numeric_cast<int>( C.spacing() ) );
2988  const complex<float> alpha( scalar );
2989  const complex<float> beta ( 0.0F, 0.0F );
2990 
2991  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2992  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2993  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2994  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2995  }
2996 #endif
2997  //**********************************************************************************************
2998 
2999  //**BLAS-based assignment to dense matrices (double precision complex)**************************
3000 #if BLAZE_BLAS_MODE
3001 
3014  template< typename MT3 // Type of the left-hand side target matrix
3015  , typename MT4 // Type of the left-hand side matrix operand
3016  , typename MT5 // Type of the right-hand side matrix operand
3017  , typename ST2 > // Type of the scalar
3018  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3019  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3020  {
3021  using boost::numeric_cast;
3022 
3026  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3027  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3028  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3029 
3030  const int M ( numeric_cast<int>( A.rows() ) );
3031  const int N ( numeric_cast<int>( B.columns() ) );
3032  const int K ( numeric_cast<int>( A.columns() ) );
3033  const int lda( numeric_cast<int>( A.spacing() ) );
3034  const int ldb( numeric_cast<int>( B.spacing() ) );
3035  const int ldc( numeric_cast<int>( C.spacing() ) );
3036  const complex<double> alpha( scalar );
3037  const complex<double> beta ( 0.0, 0.0 );
3038 
3039  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3040  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3041  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3042  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3043  }
3044 #endif
3045  //**********************************************************************************************
3046 
3047  //**Assignment to sparse matrices***************************************************************
3058  template< typename MT // Type of the target sparse matrix
3059  , bool SO > // Storage order of the target sparse matrix
3060  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3061  {
3063 
3064  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
3065 
3072 
3073  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3074  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3075 
3076  const TmpType tmp( serial( rhs ) );
3077  assign( ~lhs, tmp );
3078  }
3079  //**********************************************************************************************
3080 
3081  //**Addition assignment to dense matrices*******************************************************
3093  template< typename MT // Type of the target dense matrix
3094  , bool SO > // Storage order of the target dense matrix
3095  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3096  {
3098 
3099  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3100  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3101 
3102  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3103  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3104 
3105  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3106  return;
3107  }
3108 
3109  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3110  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3111 
3112  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3113  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3114  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3115  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3116  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3117  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3118 
3119  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3120  }
3121  //**********************************************************************************************
3122 
3123  //**Addition assignment to dense matrices (kernel selection)************************************
3134  template< typename MT3 // Type of the left-hand side target matrix
3135  , typename MT4 // Type of the left-hand side matrix operand
3136  , typename MT5 // Type of the right-hand side matrix operand
3137  , typename ST2 > // Type of the scalar value
3138  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3139  {
3140  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
3141  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3142  else
3143  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3144  }
3145  //**********************************************************************************************
3146 
3147  //**Default addition assignment to dense matrices***********************************************
3161  template< typename MT3 // Type of the left-hand side target matrix
3162  , typename MT4 // Type of the left-hand side matrix operand
3163  , typename MT5 // Type of the right-hand side matrix operand
3164  , typename ST2 > // Type of the scalar value
3165  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3166  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3167  {
3168  const ResultType tmp( serial( A * B * scalar ) );
3169  addAssign( C, tmp );
3170  }
3171  //**********************************************************************************************
3172 
3173  //**Vectorized default addition assignment to row-major dense matrices**************************
3187  template< typename MT3 // Type of the left-hand side target matrix
3188  , typename MT4 // Type of the left-hand side matrix operand
3189  , typename MT5 // Type of the right-hand side matrix operand
3190  , typename ST2 > // Type of the scalar value
3191  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3192  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3193  {
3194  typedef IntrinsicTrait<ElementType> IT;
3195 
3196  const size_t M( A.rows() );
3197  const size_t N( B.columns() );
3198  const size_t K( A.columns() );
3199 
3200  const IntrinsicType factor( set( scalar ) );
3201 
3202  size_t j( 0UL );
3203 
3204  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3205  for( size_t i=0UL; i<M; ++i ) {
3206  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3207  for( size_t k=0UL; k<K; ++k ) {
3208  const IntrinsicType a1( set( A(i,k) ) );
3209  xmm1 = xmm1 + a1 * B.load(k,j );
3210  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3211  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3212  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3213  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3214  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3215  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3216  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3217  }
3218  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3219  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3220  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3221  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3222  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) + xmm5 * factor );
3223  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) + xmm6 * factor );
3224  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) + xmm7 * factor );
3225  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) + xmm8 * factor );
3226  }
3227  }
3228  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3229  size_t i( 0UL );
3230  for( ; (i+2UL) <= M; i+=2UL ) {
3231  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3232  for( size_t k=0UL; k<K; ++k ) {
3233  const IntrinsicType a1( set( A(i ,k) ) );
3234  const IntrinsicType a2( set( A(i+1UL,k) ) );
3235  const IntrinsicType b1( B.load(k,j ) );
3236  const IntrinsicType b2( B.load(k,j+IT::size ) );
3237  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3238  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3239  xmm1 = xmm1 + a1 * b1;
3240  xmm2 = xmm2 + a1 * b2;
3241  xmm3 = xmm3 + a1 * b3;
3242  xmm4 = xmm4 + a1 * b4;
3243  xmm5 = xmm5 + a2 * b1;
3244  xmm6 = xmm6 + a2 * b2;
3245  xmm7 = xmm7 + a2 * b3;
3246  xmm8 = xmm8 + a2 * b4;
3247  }
3248  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3249  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) + xmm2 * factor );
3250  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) + xmm3 * factor );
3251  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) + xmm4 * factor );
3252  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
3253  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) + xmm6 * factor );
3254  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) + xmm7 * factor );
3255  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) + xmm8 * factor );
3256  }
3257  if( i < M ) {
3258  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3259  for( size_t k=0UL; k<K; ++k ) {
3260  const IntrinsicType a1( set( A(i,k) ) );
3261  xmm1 = xmm1 + a1 * B.load(k,j );
3262  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3263  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3264  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3265  }
3266  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3267  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3268  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3269  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3270  }
3271  }
3272  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3273  size_t i( 0UL );
3274  for( ; (i+2UL) <= M; i+=2UL ) {
3275  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3276  for( size_t k=0UL; k<K; ++k ) {
3277  const IntrinsicType a1( set( A(i ,k) ) );
3278  const IntrinsicType a2( set( A(i+1UL,k) ) );
3279  const IntrinsicType b1( B.load(k,j ) );
3280  const IntrinsicType b2( B.load(k,j+IT::size) );
3281  xmm1 = xmm1 + a1 * b1;
3282  xmm2 = xmm2 + a1 * b2;
3283  xmm3 = xmm3 + a2 * b1;
3284  xmm4 = xmm4 + a2 * b2;
3285  }
3286  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3287  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) + xmm2 * factor );
3288  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
3289  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) + xmm4 * factor );
3290  }
3291  if( i < M ) {
3292  IntrinsicType xmm1, xmm2;
3293  for( size_t k=0UL; k<K; ++k ) {
3294  const IntrinsicType a1( set( A(i,k) ) );
3295  xmm1 = xmm1 + a1 * B.load(k,j );
3296  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3297  }
3298  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3299  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) + xmm2 * factor );
3300  }
3301  }
3302  if( j < N ) {
3303  size_t i( 0UL );
3304  for( ; (i+2UL) <= M; i+=2UL ) {
3305  IntrinsicType xmm1, xmm2;
3306  for( size_t k=0UL; k<K; ++k ) {
3307  const IntrinsicType b1( B.load(k,j) );
3308  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3309  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3310  }
3311  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3312  (~C).store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
3313  }
3314  if( i < M ) {
3315  IntrinsicType xmm1;
3316  for( size_t k=0UL; k<K; ++k ) {
3317  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
3318  }
3319  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
3320  }
3321  }
3322  }
3323  //**********************************************************************************************
3324 
3325  //**Vectorized default addition assignment to column-major dense matrices***********************
3339  template< typename MT3 // Type of the left-hand side target matrix
3340  , typename MT4 // Type of the left-hand side matrix operand
3341  , typename MT5 // Type of the right-hand side matrix operand
3342  , typename ST2 > // Type of the scalar value
3343  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3344  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3345  {
3348 
3349  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3350  const typename MT4::OppositeType tmp( serial( A ) );
3351  addAssign( ~C, tmp * B * scalar );
3352  }
3353  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3354  const typename MT5::OppositeType tmp( serial( B ) );
3355  addAssign( ~C, A * tmp * scalar );
3356  }
3357  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3358  const typename MT4::OppositeType tmp( serial( A ) );
3359  addAssign( ~C, tmp * B * scalar );
3360  }
3361  else {
3362  const typename MT5::OppositeType tmp( serial( B ) );
3363  addAssign( ~C, A * tmp * scalar );
3364  }
3365  }
3366  //**********************************************************************************************
3367 
3368  //**BLAS-based addition assignment to dense matrices (default)**********************************
3382  template< typename MT3 // Type of the left-hand side target matrix
3383  , typename MT4 // Type of the left-hand side matrix operand
3384  , typename MT5 // Type of the right-hand side matrix operand
3385  , typename ST2 > // Type of the scalar value
3386  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3387  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3388  {
3389  selectDefaultAddAssignKernel( C, A, B, scalar );
3390  }
3391  //**********************************************************************************************
3392 
3393  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3394 #if BLAZE_BLAS_MODE
3395 
3408  template< typename MT3 // Type of the left-hand side target matrix
3409  , typename MT4 // Type of the left-hand side matrix operand
3410  , typename MT5 // Type of the right-hand side matrix operand
3411  , typename ST2 > // Type of the scalar value
3412  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3413  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3414  {
3415  using boost::numeric_cast;
3416 
3420 
3421  const int M ( numeric_cast<int>( A.rows() ) );
3422  const int N ( numeric_cast<int>( B.columns() ) );
3423  const int K ( numeric_cast<int>( A.columns() ) );
3424  const int lda( numeric_cast<int>( A.spacing() ) );
3425  const int ldb( numeric_cast<int>( B.spacing() ) );
3426  const int ldc( numeric_cast<int>( C.spacing() ) );
3427 
3428  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3429  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3430  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3431  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3432  }
3433 #endif
3434  //**********************************************************************************************
3435 
3436  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3437 #if BLAZE_BLAS_MODE
3438 
3451  template< typename MT3 // Type of the left-hand side target matrix
3452  , typename MT4 // Type of the left-hand side matrix operand
3453  , typename MT5 // Type of the right-hand side matrix operand
3454  , typename ST2 > // Type of the scalar value
3455  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3456  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3457  {
3458  using boost::numeric_cast;
3459 
3463 
3464  const int M ( numeric_cast<int>( A.rows() ) );
3465  const int N ( numeric_cast<int>( B.columns() ) );
3466  const int K ( numeric_cast<int>( A.columns() ) );
3467  const int lda( numeric_cast<int>( A.spacing() ) );
3468  const int ldb( numeric_cast<int>( B.spacing() ) );
3469  const int ldc( numeric_cast<int>( C.spacing() ) );
3470 
3471  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3472  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3473  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3474  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3475  }
3476 #endif
3477  //**********************************************************************************************
3478 
3479  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3480 #if BLAZE_BLAS_MODE
3481 
3494  template< typename MT3 // Type of the left-hand side target matrix
3495  , typename MT4 // Type of the left-hand side matrix operand
3496  , typename MT5 // Type of the right-hand side matrix operand
3497  , typename ST2 > // Type of the scalar value
3498  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3499  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3500  {
3501  using boost::numeric_cast;
3502 
3506  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3507  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3508  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3509 
3510  const int M ( numeric_cast<int>( A.rows() ) );
3511  const int N ( numeric_cast<int>( B.columns() ) );
3512  const int K ( numeric_cast<int>( A.columns() ) );
3513  const int lda( numeric_cast<int>( A.spacing() ) );
3514  const int ldb( numeric_cast<int>( B.spacing() ) );
3515  const int ldc( numeric_cast<int>( C.spacing() ) );
3516  const complex<float> alpha( scalar );
3517  const complex<float> beta ( 1.0F, 0.0F );
3518 
3519  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3520  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3521  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3522  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3523  }
3524 #endif
3525  //**********************************************************************************************
3526 
3527  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3528 #if BLAZE_BLAS_MODE
3529 
3542  template< typename MT3 // Type of the left-hand side target matrix
3543  , typename MT4 // Type of the left-hand side matrix operand
3544  , typename MT5 // Type of the right-hand side matrix operand
3545  , typename ST2 > // Type of the scalar value
3546  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3547  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3548  {
3549  using boost::numeric_cast;
3550 
3554  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3555  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3556  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3557 
3558  const int M ( numeric_cast<int>( A.rows() ) );
3559  const int N ( numeric_cast<int>( B.columns() ) );
3560  const int K ( numeric_cast<int>( A.columns() ) );
3561  const int lda( numeric_cast<int>( A.spacing() ) );
3562  const int ldb( numeric_cast<int>( B.spacing() ) );
3563  const int ldc( numeric_cast<int>( C.spacing() ) );
3564  const complex<double> alpha( scalar );
3565  const complex<double> beta ( 1.0, 0.0 );
3566 
3567  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3568  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3569  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3570  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3571  }
3572 #endif
3573  //**********************************************************************************************
3574 
3575  //**Addition assignment to sparse matrices******************************************************
3576  // No special implementation for the addition assignment to sparse matrices.
3577  //**********************************************************************************************
3578 
3579  //**Subtraction assignment to dense matrices****************************************************
3591  template< typename MT // Type of the target dense matrix
3592  , bool SO > // Storage order of the target dense matrix
3593  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3594  {
3596 
3597  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3598  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3599 
3600  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3601  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3602 
3603  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3604  return;
3605  }
3606 
3607  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3608  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3609 
3610  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3611  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3612  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3613  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3614  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3615  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3616 
3617  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3618  }
3619  //**********************************************************************************************
3620 
3621  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3632  template< typename MT3 // Type of the left-hand side target matrix
3633  , typename MT4 // Type of the left-hand side matrix operand
3634  , typename MT5 // Type of the right-hand side matrix operand
3635  , typename ST2 > // Type of the scalar value
3636  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3637  {
3638  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
3639  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3640  else
3641  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3642  }
3643  //**********************************************************************************************
3644 
3645  //**Default subtraction assignment to dense matrices********************************************
3659  template< typename MT3 // Type of the left-hand side target matrix
3660  , typename MT4 // Type of the left-hand side matrix operand
3661  , typename MT5 // Type of the right-hand side matrix operand
3662  , typename ST2 > // Type of the scalar value
3663  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3664  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3665  {
3666  const ResultType tmp( serial( A * B * scalar ) );
3667  subAssign( C, tmp );
3668  }
3669  //**********************************************************************************************
3670 
3671  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3685  template< typename MT3 // Type of the left-hand side target matrix
3686  , typename MT4 // Type of the left-hand side matrix operand
3687  , typename MT5 // Type of the right-hand side matrix operand
3688  , typename ST2 > // Type of the scalar value
3689  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3690  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3691  {
3692  typedef IntrinsicTrait<ElementType> IT;
3693 
3694  const size_t M( A.rows() );
3695  const size_t N( B.columns() );
3696  const size_t K( A.columns() );
3697 
3698  const IntrinsicType factor( set( scalar ) );
3699 
3700  size_t j( 0UL );
3701 
3702  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3703  for( size_t i=0UL; i<M; ++i ) {
3704  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3705  for( size_t k=0UL; k<K; ++k ) {
3706  const IntrinsicType a1( set( A(i,k) ) );
3707  xmm1 = xmm1 + a1 * B.load(k,j );
3708  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3709  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3710  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3711  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3712  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3713  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3714  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3715  }
3716  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3717  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3718  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3719  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3720  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) - xmm5 * factor );
3721  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) - xmm6 * factor );
3722  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) - xmm7 * factor );
3723  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) - xmm8 * factor );
3724  }
3725  }
3726  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3727  size_t i( 0UL );
3728  for( ; (i+2UL) <= M; i+=2UL ) {
3729  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3730  for( size_t k=0UL; k<K; ++k ) {
3731  const IntrinsicType a1( set( A(i ,k) ) );
3732  const IntrinsicType a2( set( A(i+1UL,k) ) );
3733  const IntrinsicType b1( B.load(k,j ) );
3734  const IntrinsicType b2( B.load(k,j+IT::size ) );
3735  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3736  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3737  xmm1 = xmm1 + a1 * b1;
3738  xmm2 = xmm2 + a1 * b2;
3739  xmm3 = xmm3 + a1 * b3;
3740  xmm4 = xmm4 + a1 * b4;
3741  xmm5 = xmm5 + a2 * b1;
3742  xmm6 = xmm6 + a2 * b2;
3743  xmm7 = xmm7 + a2 * b3;
3744  xmm8 = xmm8 + a2 * b4;
3745  }
3746  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3747  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) - xmm2 * factor );
3748  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) - xmm3 * factor );
3749  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) - xmm4 * factor );
3750  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
3751  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) - xmm6 * factor );
3752  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) - xmm7 * factor );
3753  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) - xmm8 * factor );
3754  }
3755  if( i < M ) {
3756  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3757  for( size_t k=0UL; k<K; ++k ) {
3758  const IntrinsicType a1( set( A(i,k) ) );
3759  xmm1 = xmm1 + a1 * B.load(k,j );
3760  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3761  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3762  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3763  }
3764  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3765  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3766  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3767  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3768  }
3769  }
3770  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3771  size_t i( 0UL );
3772  for( ; (i+2UL) <= M; i+=2UL ) {
3773  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3774  for( size_t k=0UL; k<K; ++k ) {
3775  const IntrinsicType a1( set( A(i ,k) ) );
3776  const IntrinsicType a2( set( A(i+1UL,k) ) );
3777  const IntrinsicType b1( B.load(k,j ) );
3778  const IntrinsicType b2( B.load(k,j+IT::size) );
3779  xmm1 = xmm1 + a1 * b1;
3780  xmm2 = xmm2 + a1 * b2;
3781  xmm3 = xmm3 + a2 * b1;
3782  xmm4 = xmm4 + a2 * b2;
3783  }
3784  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3785  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) - xmm2 * factor );
3786  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
3787  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) - xmm4 * factor );
3788  }
3789  if( i < M ) {
3790  IntrinsicType xmm1, xmm2;
3791  for( size_t k=0UL; k<K; ++k ) {
3792  const IntrinsicType a1( set( A(i,k) ) );
3793  xmm1 = xmm1 + a1 * B.load(k,j );
3794  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3795  }
3796  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3797  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) - xmm2 * factor );
3798  }
3799  }
3800  if( j < N ) {
3801  size_t i( 0UL );
3802  for( ; (i+2UL) <= M; i+=2UL ) {
3803  IntrinsicType xmm1, xmm2;
3804  for( size_t k=0UL; k<K; ++k ) {
3805  const IntrinsicType b1( B.load(k,j) );
3806  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3807  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3808  }
3809  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3810  (~C).store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
3811  }
3812  if( i < M ) {
3813  IntrinsicType xmm1;
3814  for( size_t k=0UL; k<K; ++k ) {
3815  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
3816  }
3817  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
3818  }
3819  }
3820  }
3821  //**********************************************************************************************
3822 
3823  //**Vectorized default subtraction assignment to column-major dense matrices********************
3837  template< typename MT3 // Type of the left-hand side target matrix
3838  , typename MT4 // Type of the left-hand side matrix operand
3839  , typename MT5 // Type of the right-hand side matrix operand
3840  , typename ST2 > // Type of the scalar value
3841  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3842  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3843  {
3846 
3847  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3848  const typename MT4::OppositeType tmp( serial( A ) );
3849  subAssign( ~C, tmp * B * scalar );
3850  }
3851  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3852  const typename MT5::OppositeType tmp( serial( B ) );
3853  subAssign( ~C, A * tmp * scalar );
3854  }
3855  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3856  const typename MT4::OppositeType tmp( serial( A ) );
3857  subAssign( ~C, tmp * B * scalar );
3858  }
3859  else {
3860  const typename MT5::OppositeType tmp( serial( B ) );
3861  subAssign( ~C, A * tmp * scalar );
3862  }
3863  }
3864  //**********************************************************************************************
3865 
3866  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3880  template< typename MT3 // Type of the left-hand side target matrix
3881  , typename MT4 // Type of the left-hand side matrix operand
3882  , typename MT5 // Type of the right-hand side matrix operand
3883  , typename ST2 > // Type of the scalar value
3884  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3885  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3886  {
3887  selectDefaultSubAssignKernel( C, A, B, scalar );
3888  }
3889  //**********************************************************************************************
3890 
3891  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3892 #if BLAZE_BLAS_MODE
3893 
3906  template< typename MT3 // Type of the left-hand side target matrix
3907  , typename MT4 // Type of the left-hand side matrix operand
3908  , typename MT5 // Type of the right-hand side matrix operand
3909  , typename ST2 > // Type of the scalar value
3910  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3911  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3912  {
3913  using boost::numeric_cast;
3914 
3918 
3919  const int M ( numeric_cast<int>( A.rows() ) );
3920  const int N ( numeric_cast<int>( B.columns() ) );
3921  const int K ( numeric_cast<int>( A.columns() ) );
3922  const int lda( numeric_cast<int>( A.spacing() ) );
3923  const int ldb( numeric_cast<int>( B.spacing() ) );
3924  const int ldc( numeric_cast<int>( C.spacing() ) );
3925 
3926  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3927  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3928  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3929  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3930  }
3931 #endif
3932  //**********************************************************************************************
3933 
3934  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3935 #if BLAZE_BLAS_MODE
3936 
3949  template< typename MT3 // Type of the left-hand side target matrix
3950  , typename MT4 // Type of the left-hand side matrix operand
3951  , typename MT5 // Type of the right-hand side matrix operand
3952  , typename ST2 > // Type of the scalar value
3953  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3954  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3955  {
3956  using boost::numeric_cast;
3957 
3961 
3962  const int M ( numeric_cast<int>( A.rows() ) );
3963  const int N ( numeric_cast<int>( B.columns() ) );
3964  const int K ( numeric_cast<int>( A.columns() ) );
3965  const int lda( numeric_cast<int>( A.spacing() ) );
3966  const int ldb( numeric_cast<int>( B.spacing() ) );
3967  const int ldc( numeric_cast<int>( C.spacing() ) );
3968 
3969  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3970  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3971  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3972  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3973  }
3974 #endif
3975  //**********************************************************************************************
3976 
3977  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3978 #if BLAZE_BLAS_MODE
3979 
3992  template< typename MT3 // Type of the left-hand side target matrix
3993  , typename MT4 // Type of the left-hand side matrix operand
3994  , typename MT5 // Type of the right-hand side matrix operand
3995  , typename ST2 > // Type of the scalar value
3996  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3997  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3998  {
3999  using boost::numeric_cast;
4000 
4004  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
4005  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
4006  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
4007 
4008  const int M ( numeric_cast<int>( A.rows() ) );
4009  const int N ( numeric_cast<int>( B.columns() ) );
4010  const int K ( numeric_cast<int>( A.columns() ) );
4011  const int lda( numeric_cast<int>( A.spacing() ) );
4012  const int ldb( numeric_cast<int>( B.spacing() ) );
4013  const int ldc( numeric_cast<int>( C.spacing() ) );
4014  const complex<float> alpha( -scalar );
4015  const complex<float> beta ( 1.0F, 0.0F );
4016 
4017  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4018  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4019  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4020  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4021  }
4022 #endif
4023  //**********************************************************************************************
4024 
4025  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
4026 #if BLAZE_BLAS_MODE
4027 
4040  template< typename MT3 // Type of the left-hand side target matrix
4041  , typename MT4 // Type of the left-hand side matrix operand
4042  , typename MT5 // Type of the right-hand side matrix operand
4043  , typename ST2 > // Type of the scalar value
4044  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4045  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4046  {
4047  using boost::numeric_cast;
4048 
4052  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
4053  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
4054  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
4055 
4056  const int M ( numeric_cast<int>( A.rows() ) );
4057  const int N ( numeric_cast<int>( B.columns() ) );
4058  const int K ( numeric_cast<int>( A.columns() ) );
4059  const int lda( numeric_cast<int>( A.spacing() ) );
4060  const int ldb( numeric_cast<int>( B.spacing() ) );
4061  const int ldc( numeric_cast<int>( C.spacing() ) );
4062  const complex<double> alpha( -scalar );
4063  const complex<double> beta ( 1.0, 0.0 );
4064 
4065  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4066  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4067  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4068  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4069  }
4070 #endif
4071  //**********************************************************************************************
4072 
4073  //**Subtraction assignment to sparse matrices***************************************************
4074  // No special implementation for the subtraction assignment to sparse matrices.
4075  //**********************************************************************************************
4076 
4077  //**Multiplication assignment to dense matrices*************************************************
4078  // No special implementation for the multiplication assignment to dense matrices.
4079  //**********************************************************************************************
4080 
4081  //**Multiplication assignment to sparse matrices************************************************
4082  // No special implementation for the multiplication assignment to sparse matrices.
4083  //**********************************************************************************************
4084 
4085  //**SMP assignment to dense matrices************************************************************
4099  template< typename MT // Type of the target dense matrix
4100  , bool SO > // Storage order of the target dense matrix
4101  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4102  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4103  {
4105 
4106  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4107  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4108 
4109  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4110  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4111 
4112  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4113  return;
4114  }
4115  else if( left.columns() == 0UL ) {
4116  reset( ~lhs );
4117  return;
4118  }
4119 
4120  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4121  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4122 
4123  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4124  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4125  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4126  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4127  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4128  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4129 
4130  smpAssign( ~lhs, A * B * rhs.scalar_ );
4131  }
4132  //**********************************************************************************************
4133 
4134  //**SMP assignment to sparse matrices***********************************************************
4147  template< typename MT // Type of the target sparse matrix
4148  , bool SO > // Storage order of the target sparse matrix
4149  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4150  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4151  {
4153 
4154  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
4155 
4162 
4163  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4164  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4165 
4166  const TmpType tmp( rhs );
4167  smpAssign( ~lhs, tmp );
4168  }
4169  //**********************************************************************************************
4170 
4171  //**SMP addition assignment to dense matrices***************************************************
4185  template< typename MT // Type of the target dense matrix
4186  , bool SO > // Storage order of the target dense matrix
4187  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4188  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4189  {
4191 
4192  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4193  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4194 
4195  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4196  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4197 
4198  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4199  return;
4200  }
4201 
4202  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4203  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4204 
4205  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4206  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4207  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4208  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4209  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4210  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4211 
4212  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
4213  }
4214  //**********************************************************************************************
4215 
4216  //**SMP addition assignment to sparse matrices**************************************************
4217  // No special implementation for the SMP addition assignment to sparse matrices.
4218  //**********************************************************************************************
4219 
4220  //**SMP subtraction assignment to dense matrices************************************************
4234  template< typename MT // Type of the target dense matrix
4235  , bool SO > // Storage order of the target dense matrix
4236  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4237  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4238  {
4240 
4241  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4242  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4243 
4244  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4245  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4246 
4247  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4248  return;
4249  }
4250 
4251  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4252  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4253 
4254  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4255  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4256  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4257  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4258  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4259  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4260 
4261  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
4262  }
4263  //**********************************************************************************************
4264 
4265  //**SMP subtraction assignment to sparse matrices***********************************************
4266  // No special implementation for the SMP subtraction assignment to sparse matrices.
4267  //**********************************************************************************************
4268 
4269  //**SMP multiplication assignment to dense matrices*********************************************
4270  // No special implementation for the SMP multiplication assignment to dense matrices.
4271  //**********************************************************************************************
4272 
4273  //**SMP multiplication assignment to sparse matrices********************************************
4274  // No special implementation for the SMP multiplication assignment to sparse matrices.
4275  //**********************************************************************************************
4276 
4277  //**Compile time checks*************************************************************************
4286  //**********************************************************************************************
4287 };
4289 //*************************************************************************************************
4290 
4291 
4292 
4293 
4294 //=================================================================================================
4295 //
4296 // GLOBAL BINARY ARITHMETIC OPERATORS
4297 //
4298 //=================================================================================================
4299 
4300 //*************************************************************************************************
4326 template< typename T1 // Type of the left-hand side dense matrix
4327  , typename T2 > // Type of the right-hand side dense matrix
4328 inline const DMatDMatMultExpr<T1,T2>
4330 {
4332 
4333  if( (~lhs).columns() != (~rhs).rows() )
4334  throw std::invalid_argument( "Matrix sizes do not match" );
4335 
4336  return DMatDMatMultExpr<T1,T2>( ~lhs, ~rhs );
4337 }
4338 //*************************************************************************************************
4339 
4340 
4341 
4342 
4343 //=================================================================================================
4344 //
4345 // EXPRESSION TRAIT SPECIALIZATIONS
4346 //
4347 //=================================================================================================
4348 
4349 //*************************************************************************************************
4351 template< typename MT1, typename MT2, typename VT >
4352 struct DMatDVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
4353 {
4354  public:
4355  //**********************************************************************************************
4356  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4357  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4358  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4359  , typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
4360  , INVALID_TYPE >::Type Type;
4361  //**********************************************************************************************
4362 };
4364 //*************************************************************************************************
4365 
4366 
4367 //*************************************************************************************************
4369 template< typename MT1, typename MT2, typename VT >
4370 struct DMatSVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
4371 {
4372  public:
4373  //**********************************************************************************************
4374  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4375  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4376  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4377  , typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
4378  , INVALID_TYPE >::Type Type;
4379  //**********************************************************************************************
4380 };
4382 //*************************************************************************************************
4383 
4384 
4385 //*************************************************************************************************
4387 template< typename VT, typename MT1, typename MT2 >
4388 struct TDVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
4389 {
4390  public:
4391  //**********************************************************************************************
4392  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4393  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4394  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4395  , typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4396  , INVALID_TYPE >::Type Type;
4397  //**********************************************************************************************
4398 };
4400 //*************************************************************************************************
4401 
4402 
4403 //*************************************************************************************************
4405 template< typename VT, typename MT1, typename MT2 >
4406 struct TSVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
4407 {
4408  public:
4409  //**********************************************************************************************
4410  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4411  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4412  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4413  , typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4414  , INVALID_TYPE >::Type Type;
4415  //**********************************************************************************************
4416 };
4418 //*************************************************************************************************
4419 
4420 
4421 //*************************************************************************************************
4423 template< typename MT1, typename MT2, bool AF >
4424 struct SubmatrixExprTrait< DMatDMatMultExpr<MT1,MT2>, AF >
4425 {
4426  public:
4427  //**********************************************************************************************
4428  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4429  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4430  //**********************************************************************************************
4431 };
4433 //*************************************************************************************************
4434 
4435 
4436 //*************************************************************************************************
4438 template< typename MT1, typename MT2 >
4439 struct RowExprTrait< DMatDMatMultExpr<MT1,MT2> >
4440 {
4441  public:
4442  //**********************************************************************************************
4443  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4444  //**********************************************************************************************
4445 };
4447 //*************************************************************************************************
4448 
4449 
4450 //*************************************************************************************************
4452 template< typename MT1, typename MT2 >
4453 struct ColumnExprTrait< DMatDMatMultExpr<MT1,MT2> >
4454 {
4455  public:
4456  //**********************************************************************************************
4457  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
4458  //**********************************************************************************************
4459 };
4461 //*************************************************************************************************
4462 
4463 } // namespace blaze
4464 
4465 #endif
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:126
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4599
EnableIf< IsIntegral< T > >::Type store(T *address, const typename Store< T, sizeof(T)>::Type &value)
Aligned store of a vector of integral values.
Definition: Store.h:223
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4329
Header file for the SparseVector base class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:123
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:199
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:259
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:331
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
DMatDMatMultExpr< MT1, MT2 > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:246
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:250
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2408
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:251
Header file for the DenseVector base class.
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:249
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:256
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:690
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:115
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2404
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:124
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:351
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDMatMultExpr.h:251
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDMatMultExpr.h:395
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
const size_t SMP_DMATDMATMULT_THRESHOLD
SMP row-major dense matrix/row-major dense matrix multiplication threshold.This threshold specifies w...
Definition: Thresholds.h:834
const size_t DMATDMATMULT_THRESHOLD
Row-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the thr...
Definition: Thresholds.h:125
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:286
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDMatMultExpr.h:405
Header file for the IsDenseMatrix type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:248
Header file for the EnableIf class template.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:373
Header file for the serial shim.
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:361
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:301
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:414
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:247
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:253
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:341
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:249
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:265
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:262
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:252
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:250
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:121
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2403
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
Header file for the complex data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:125
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:122
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:415
Header file for the IsResizable type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:385
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.