All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
76 #include <blaze/system/BLAS.h>
78 #include <blaze/util/Assert.h>
79 #include <blaze/util/Complex.h>
86 #include <blaze/util/DisableIf.h>
87 #include <blaze/util/EnableIf.h>
88 #include <blaze/util/InvalidType.h>
90 #include <blaze/util/SelectType.h>
91 #include <blaze/util/Types.h>
97 
98 
99 namespace blaze {
100 
101 //=================================================================================================
102 //
103 // CLASS DMATDMATMULTEXPR
104 //
105 //=================================================================================================
106 
107 //*************************************************************************************************
114 template< typename MT1 // Type of the left-hand side dense matrix
115  , typename MT2 > // Type of the right-hand side dense matrix
116 class DMatDMatMultExpr : public DenseMatrix< DMatDMatMultExpr<MT1,MT2>, false >
117  , private MatMatMultExpr
118  , private Computation
119 {
120  private:
121  //**Type definitions****************************************************************************
122  typedef typename MT1::ResultType RT1;
123  typedef typename MT2::ResultType RT2;
124  typedef typename RT1::ElementType ET1;
125  typedef typename RT2::ElementType ET2;
126  typedef typename MT1::CompositeType CT1;
127  typedef typename MT2::CompositeType CT2;
128  //**********************************************************************************************
129 
130  //**********************************************************************************************
133  //**********************************************************************************************
134 
135  //**********************************************************************************************
137  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
138  //**********************************************************************************************
139 
140  //**********************************************************************************************
142 
145  template< typename T1, typename T2, typename T3 >
146  struct UseSMPAssignKernel {
147  enum { value = evaluateLeft || evaluateRight };
148  };
150  //**********************************************************************************************
151 
152  //**********************************************************************************************
154 
157  template< typename T1, typename T2, typename T3 >
158  struct UseSinglePrecisionKernel {
159  enum { value = IsFloat<typename T1::ElementType>::value &&
160  IsFloat<typename T2::ElementType>::value &&
161  IsFloat<typename T3::ElementType>::value };
162  };
164  //**********************************************************************************************
165 
166  //**********************************************************************************************
168 
171  template< typename T1, typename T2, typename T3 >
172  struct UseDoublePrecisionKernel {
173  enum { value = IsDouble<typename T1::ElementType>::value &&
174  IsDouble<typename T2::ElementType>::value &&
175  IsDouble<typename T3::ElementType>::value };
176  };
178  //**********************************************************************************************
179 
180  //**********************************************************************************************
182 
186  template< typename T1, typename T2, typename T3 >
187  struct UseSinglePrecisionComplexKernel {
188  typedef complex<float> Type;
189  enum { value = IsSame<typename T1::ElementType,Type>::value &&
190  IsSame<typename T2::ElementType,Type>::value &&
191  IsSame<typename T3::ElementType,Type>::value };
192  };
194  //**********************************************************************************************
195 
196  //**********************************************************************************************
198 
202  template< typename T1, typename T2, typename T3 >
203  struct UseDoublePrecisionComplexKernel {
204  typedef complex<double> Type;
205  enum { value = IsSame<typename T1::ElementType,Type>::value &&
206  IsSame<typename T2::ElementType,Type>::value &&
207  IsSame<typename T3::ElementType,Type>::value };
208  };
210  //**********************************************************************************************
211 
212  //**********************************************************************************************
214 
217  template< typename T1, typename T2, typename T3 >
218  struct UseDefaultKernel {
219  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
220  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
221  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
222  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
223  };
225  //**********************************************************************************************
226 
227  //**********************************************************************************************
229 
232  template< typename T1, typename T2, typename T3 >
233  struct UseVectorizedDefaultKernel {
234  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
235  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
236  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
237  IntrinsicTrait<typename T1::ElementType>::addition &&
238  IntrinsicTrait<typename T1::ElementType>::subtraction &&
239  IntrinsicTrait<typename T1::ElementType>::multiplication };
240  };
242  //**********************************************************************************************
243 
244  public:
245  //**Type definitions****************************************************************************
252  typedef const ElementType ReturnType;
253  typedef const ResultType CompositeType;
254 
256  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
257 
259  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
260 
263 
266  //**********************************************************************************************
267 
268  //**Compilation flags***************************************************************************
270  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
274 
276  enum { smpAssignable = !evaluateLeft && !evaluateRight };
277  //**********************************************************************************************
278 
279  //**Constructor*********************************************************************************
285  explicit inline DMatDMatMultExpr( const MT1& lhs, const MT2& rhs )
286  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
287  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
288  {
289  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
290  }
291  //**********************************************************************************************
292 
293  //**Access operator*****************************************************************************
300  inline ReturnType operator()( size_t i, size_t j ) const {
301  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
302  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
303 
304  ElementType tmp;
305 
306  if( lhs_.columns() != 0UL ) {
307  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
308  tmp = lhs_(i,0UL) * rhs_(0UL,j);
309  for( size_t k=1UL; k<end; k+=2UL ) {
310  tmp += lhs_(i,k ) * rhs_(k ,j);
311  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
312  }
313  if( end < lhs_.columns() ) {
314  tmp += lhs_(i,end) * rhs_(end,j);
315  }
316  }
317  else {
318  reset( tmp );
319  }
320 
321  return tmp;
322  }
323  //**********************************************************************************************
324 
325  //**Rows function*******************************************************************************
330  inline size_t rows() const {
331  return lhs_.rows();
332  }
333  //**********************************************************************************************
334 
335  //**Columns function****************************************************************************
340  inline size_t columns() const {
341  return rhs_.columns();
342  }
343  //**********************************************************************************************
344 
345  //**Left operand access*************************************************************************
350  inline LeftOperand leftOperand() const {
351  return lhs_;
352  }
353  //**********************************************************************************************
354 
355  //**Right operand access************************************************************************
360  inline RightOperand rightOperand() const {
361  return rhs_;
362  }
363  //**********************************************************************************************
364 
365  //**********************************************************************************************
371  template< typename T >
372  inline bool canAlias( const T* alias ) const {
373  return ( lhs_.canAlias( alias ) || rhs_.canAlias( alias ) );
374  }
375  //**********************************************************************************************
376 
377  //**********************************************************************************************
383  template< typename T >
384  inline bool isAliased( const T* alias ) const {
385  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
386  }
387  //**********************************************************************************************
388 
389  //**********************************************************************************************
394  inline bool isAligned() const {
395  return lhs_.isAligned() && rhs_.isAligned();
396  }
397  //**********************************************************************************************
398 
399  //**********************************************************************************************
404  inline bool canSMPAssign() const {
405  return ( !BLAZE_BLAS_IS_PARALLEL ||
406  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
408  }
409  //**********************************************************************************************
410 
411  private:
412  //**Member variables****************************************************************************
415  //**********************************************************************************************
416 
417  //**Assignment to dense matrices****************************************************************
427  template< typename MT3 // Type of the target dense matrix
428  , bool SO > // Storage order of the target dense matrix
429  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
430  {
432 
433  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
434  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
435 
436  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
437  return;
438  }
439  else if( rhs.lhs_.columns() == 0UL ) {
440  reset( ~lhs );
441  return;
442  }
443 
444  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
445  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
446 
447  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
448  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
449  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
450  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
451  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
452  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
453 
454  DMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
455  }
457  //**********************************************************************************************
458 
459  //**Assignment to dense matrices (kernel selection)*********************************************
470  template< typename MT3 // Type of the left-hand side target matrix
471  , typename MT4 // Type of the left-hand side matrix operand
472  , typename MT5 > // Type of the right-hand side matrix operand
473  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
474  selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
475  {
476  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
477  DMatDMatMultExpr::selectDefaultAssignKernel( C, A, B );
478  else
479  DMatDMatMultExpr::selectBlasAssignKernel( C, A, B );
480  }
482  //**********************************************************************************************
483 
484  //**Assignment to dense matrices (kernel selection)*********************************************
495  template< typename MT3 // Type of the left-hand side target matrix
496  , typename MT4 // Type of the left-hand side matrix operand
497  , typename MT5 > // Type of the right-hand side matrix operand
498  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
499  selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
500  {
501  smpAssign( C, A * B );
502  }
504  //**********************************************************************************************
505 
506  //**Default assignment to dense matrices********************************************************
519  template< typename MT3 // Type of the left-hand side target matrix
520  , typename MT4 // Type of the left-hand side matrix operand
521  , typename MT5 > // Type of the right-hand side matrix operand
522  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
523  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
524  {
525  const size_t M( A.rows() );
526  const size_t N( B.columns() );
527  const size_t K( A.columns() );
528 
529  for( size_t i=0UL; i<M; ++i ) {
530  for( size_t j=0UL; j<N; ++j ) {
531  C(i,j) = A(i,0UL) * B(0UL,j);
532  }
533  for( size_t k=1UL; k<K; ++k ) {
534  for( size_t j=0UL; j<N; ++j ) {
535  C(i,j) += A(i,k) * B(k,j);
536  }
537  }
538  }
539  }
541  //**********************************************************************************************
542 
543  //**Vectorized default assignment to row-major dense matrices***********************************
557  template< typename MT3 // Type of the left-hand side target matrix
558  , typename MT4 // Type of the left-hand side matrix operand
559  , typename MT5 > // Type of the right-hand side matrix operand
560  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
561  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
562  {
563  typedef IntrinsicTrait<ElementType> IT;
564 
565  const size_t M( A.rows() );
566  const size_t N( B.columns() );
567  const size_t K( A.columns() );
568 
569  size_t j( 0UL );
570 
571  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
572  for( size_t i=0UL; i<M; ++i ) {
573  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
574  for( size_t k=0UL; k<K; ++k ) {
575  const IntrinsicType a1( set( A(i,k) ) );
576  xmm1 = xmm1 + a1 * B.load(k,j );
577  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
578  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
579  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
580  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
581  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
582  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
583  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
584  }
585  (~C).store( i, j , xmm1 );
586  (~C).store( i, j+IT::size , xmm2 );
587  (~C).store( i, j+IT::size*2UL, xmm3 );
588  (~C).store( i, j+IT::size*3UL, xmm4 );
589  (~C).store( i, j+IT::size*4UL, xmm5 );
590  (~C).store( i, j+IT::size*5UL, xmm6 );
591  (~C).store( i, j+IT::size*6UL, xmm7 );
592  (~C).store( i, j+IT::size*7UL, xmm8 );
593  }
594  }
595  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
596  size_t i( 0UL );
597  for( ; (i+2UL) <= M; i+=2UL ) {
598  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
599  for( size_t k=0UL; k<K; ++k ) {
600  const IntrinsicType a1( set( A(i ,k) ) );
601  const IntrinsicType a2( set( A(i+1UL,k) ) );
602  const IntrinsicType b1( B.load(k,j ) );
603  const IntrinsicType b2( B.load(k,j+IT::size ) );
604  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
605  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
606  xmm1 = xmm1 + a1 * b1;
607  xmm2 = xmm2 + a1 * b2;
608  xmm3 = xmm3 + a1 * b3;
609  xmm4 = xmm4 + a1 * b4;
610  xmm5 = xmm5 + a2 * b1;
611  xmm6 = xmm6 + a2 * b2;
612  xmm7 = xmm7 + a2 * b3;
613  xmm8 = xmm8 + a2 * b4;
614  }
615  (~C).store( i , j , xmm1 );
616  (~C).store( i , j+IT::size , xmm2 );
617  (~C).store( i , j+IT::size*2UL, xmm3 );
618  (~C).store( i , j+IT::size*3UL, xmm4 );
619  (~C).store( i+1UL, j , xmm5 );
620  (~C).store( i+1UL, j+IT::size , xmm6 );
621  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
622  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
623  }
624  if( i < M ) {
625  IntrinsicType xmm1, xmm2, xmm3, xmm4;
626  for( size_t k=0UL; k<K; ++k ) {
627  const IntrinsicType a1( set( A(i,k) ) );
628  xmm1 = xmm1 + a1 * B.load(k,j );
629  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
630  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
631  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
632  }
633  (~C).store( i, j , xmm1 );
634  (~C).store( i, j+IT::size , xmm2 );
635  (~C).store( i, j+IT::size*2UL, xmm3 );
636  (~C).store( i, j+IT::size*3UL, xmm4 );
637  }
638  }
639  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
640  size_t i( 0UL );
641  for( ; (i+2UL) <= M; i+=2UL ) {
642  IntrinsicType xmm1, xmm2, xmm3, xmm4;
643  for( size_t k=0UL; k<K; ++k ) {
644  const IntrinsicType a1( set( A(i ,k) ) );
645  const IntrinsicType a2( set( A(i+1UL,k) ) );
646  const IntrinsicType b1( B.load(k,j ) );
647  const IntrinsicType b2( B.load(k,j+IT::size) );
648  xmm1 = xmm1 + a1 * b1;
649  xmm2 = xmm2 + a1 * b2;
650  xmm3 = xmm3 + a2 * b1;
651  xmm4 = xmm4 + a2 * b2;
652  }
653  (~C).store( i , j , xmm1 );
654  (~C).store( i , j+IT::size, xmm2 );
655  (~C).store( i+1UL, j , xmm3 );
656  (~C).store( i+1UL, j+IT::size, xmm4 );
657  }
658  if( i < M ) {
659  IntrinsicType xmm1, xmm2;
660  for( size_t k=0UL; k<K; ++k ) {
661  const IntrinsicType a1( set( A(i,k) ) );
662  xmm1 = xmm1 + a1 * B.load(k,j );
663  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
664  }
665  (~C).store( i, j , xmm1 );
666  (~C).store( i, j+IT::size, xmm2 );
667  }
668  }
669  if( j < N ) {
670  size_t i( 0UL );
671  for( ; (i+2UL) <= M; i+=2UL ) {
672  IntrinsicType xmm1, xmm2;
673  for( size_t k=0UL; k<K; ++k ) {
674  const IntrinsicType b1( B.load(k,j) );
675  xmm1 = xmm1 + set( A(i ,k) ) * b1;
676  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
677  }
678  (~C).store( i , j, xmm1 );
679  (~C).store( i+1UL, j, xmm2 );
680  }
681  if( i < M ) {
682  IntrinsicType xmm1;
683  for( size_t k=0UL; k<K; ++k ) {
684  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
685  }
686  (~C).store( i, j, xmm1 );
687  }
688  }
689  }
691  //**********************************************************************************************
692 
693  //**Vectorized default assignment to column-major dense matrices********************************
707  template< typename MT3 // Type of the left-hand side target matrix
708  , typename MT4 // Type of the left-hand side matrix operand
709  , typename MT5 > // Type of the right-hand side matrix operand
710  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
711  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
712  {
715 
716  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
717  const typename MT4::OppositeType tmp( A );
718  smpAssign( ~C, tmp * B );
719  }
720  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
721  const typename MT5::OppositeType tmp( B );
722  smpAssign( ~C, A * tmp );
723  }
724  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
725  const typename MT4::OppositeType tmp( A );
726  smpAssign( ~C, tmp * B );
727  }
728  else {
729  const typename MT5::OppositeType tmp( B );
730  smpAssign( ~C, A * tmp );
731  }
732  }
734  //**********************************************************************************************
735 
736  //**BLAS-based assignment to dense matrices (default)*******************************************
749  template< typename MT3 // Type of the left-hand side target matrix
750  , typename MT4 // Type of the left-hand side matrix operand
751  , typename MT5 > // Type of the right-hand side matrix operand
752  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
753  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
754  {
755  selectDefaultAssignKernel( C, A, B );
756  }
758  //**********************************************************************************************
759 
760  //**BLAS-based assignment to dense matrices (single precision)**********************************
761 #if BLAZE_BLAS_MODE
762 
775  template< typename MT3 // Type of the left-hand side target matrix
776  , typename MT4 // Type of the left-hand side matrix operand
777  , typename MT5 > // Type of the right-hand side matrix operand
778  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
779  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
780  {
781  using boost::numeric_cast;
782 
786 
787  const int M ( numeric_cast<int>( A.rows() ) );
788  const int N ( numeric_cast<int>( B.columns() ) );
789  const int K ( numeric_cast<int>( A.columns() ) );
790  const int lda( numeric_cast<int>( A.spacing() ) );
791  const int ldb( numeric_cast<int>( B.spacing() ) );
792  const int ldc( numeric_cast<int>( C.spacing() ) );
793 
794  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
795  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
796  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
797  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
798  }
800 #endif
801  //**********************************************************************************************
802 
803  //**BLAS-based assignment to dense matrices (double precision)**********************************
804 #if BLAZE_BLAS_MODE
805 
818  template< typename MT3 // Type of the left-hand side target matrix
819  , typename MT4 // Type of the left-hand side matrix operand
820  , typename MT5 > // Type of the right-hand side matrix operand
821  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
822  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
823  {
824  using boost::numeric_cast;
825 
829 
830  const int M ( numeric_cast<int>( A.rows() ) );
831  const int N ( numeric_cast<int>( B.columns() ) );
832  const int K ( numeric_cast<int>( A.columns() ) );
833  const int lda( numeric_cast<int>( A.spacing() ) );
834  const int ldb( numeric_cast<int>( B.spacing() ) );
835  const int ldc( numeric_cast<int>( C.spacing() ) );
836 
837  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
838  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
839  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
840  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
841  }
843 #endif
844  //**********************************************************************************************
845 
846  //**BLAS-based assignment to dense matrices (single precision complex)**************************
847 #if BLAZE_BLAS_MODE
848 
861  template< typename MT3 // Type of the left-hand side target matrix
862  , typename MT4 // Type of the left-hand side matrix operand
863  , typename MT5 > // Type of the right-hand side matrix operand
864  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
865  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
866  {
867  using boost::numeric_cast;
868 
872  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
873  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
874  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
875 
876  const int M ( numeric_cast<int>( A.rows() ) );
877  const int N ( numeric_cast<int>( B.columns() ) );
878  const int K ( numeric_cast<int>( A.columns() ) );
879  const int lda( numeric_cast<int>( A.spacing() ) );
880  const int ldb( numeric_cast<int>( B.spacing() ) );
881  const int ldc( numeric_cast<int>( C.spacing() ) );
882  const complex<float> alpha( 1.0F, 0.0F );
883  const complex<float> beta ( 0.0F, 0.0F );
884 
885  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
886  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
887  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
888  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
889  }
891 #endif
892  //**********************************************************************************************
893 
894  //**BLAS-based assignment to dense matrices (double precision complex)**************************
895 #if BLAZE_BLAS_MODE
896 
909  template< typename MT3 // Type of the left-hand side target matrix
910  , typename MT4 // Type of the left-hand side matrix operand
911  , typename MT5 > // Type of the right-hand side matrix operand
912  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
913  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
914  {
915  using boost::numeric_cast;
916 
920  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
921  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
922  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
923 
924  const int M ( numeric_cast<int>( A.rows() ) );
925  const int N ( numeric_cast<int>( B.columns() ) );
926  const int K ( numeric_cast<int>( A.columns() ) );
927  const int lda( numeric_cast<int>( A.spacing() ) );
928  const int ldb( numeric_cast<int>( B.spacing() ) );
929  const int ldc( numeric_cast<int>( C.spacing() ) );
930  const complex<double> alpha( 1.0, 0.0 );
931  const complex<double> beta ( 0.0, 0.0 );
932 
933  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
934  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
935  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
936  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
937  }
939 #endif
940  //**********************************************************************************************
941 
942  //**Assignment to sparse matrices***************************************************************
954  template< typename MT // Type of the target sparse matrix
955  , bool SO > // Storage order of the target sparse matrix
956  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
957  {
959 
960  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
961 
968 
969  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
970  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
971 
972  const TmpType tmp( rhs );
973  smpAssign( ~lhs, tmp );
974  }
976  //**********************************************************************************************
977 
978  //**Addition assignment to dense matrices*******************************************************
991  template< typename MT3 // Type of the target dense matrix
992  , bool SO > // Storage order of the target dense matrix
993  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
994  {
996 
997  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
998  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
999 
1000  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1001  return;
1002  }
1003 
1004  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1005  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1006 
1007  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1008  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1009  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1010  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1011  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1012  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1013 
1014  DMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1015  }
1017  //**********************************************************************************************
1018 
1019  //**Addition assignment to dense matrices (kernel selection)************************************
1030  template< typename MT3 // Type of the left-hand side target matrix
1031  , typename MT4 // Type of the left-hand side matrix operand
1032  , typename MT5 > // Type of the right-hand side matrix operand
1033  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1034  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1035  {
1036  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
1037  DMatDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1038  else
1039  DMatDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1040  }
1042  //**********************************************************************************************
1043 
1044  //**Addition assignment to dense matrices (kernel selection)************************************
1055  template< typename MT3 // Type of the left-hand side target matrix
1056  , typename MT4 // Type of the left-hand side matrix operand
1057  , typename MT5 > // Type of the right-hand side matrix operand
1058  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1059  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1060  {
1061  smpAddAssign( C, A * B );
1062  }
1064  //**********************************************************************************************
1065 
1066  //**Default addition assignment to dense matrices***********************************************
1080  template< typename MT3 // Type of the left-hand side target matrix
1081  , typename MT4 // Type of the left-hand side matrix operand
1082  , typename MT5 > // Type of the right-hand side matrix operand
1083  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1084  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1085  {
1086  const size_t M( A.rows() );
1087  const size_t N( B.columns() );
1088  const size_t K( A.columns() );
1089 
1090  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1091  const size_t end( N & size_t(-2) );
1092 
1093  for( size_t i=0UL; i<M; ++i ) {
1094  for( size_t k=0UL; k<K; ++k ) {
1095  for( size_t j=0UL; j<end; j+=2UL ) {
1096  C(i,j ) += A(i,k) * B(k,j );
1097  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1098  }
1099  if( end < N ) {
1100  C(i,end) += A(i,k) * B(k,end);
1101  }
1102  }
1103  }
1104  }
1106  //**********************************************************************************************
1107 
1108  //**Vectorized default addition assignment to row-major dense matrices**************************
1122  template< typename MT3 // Type of the left-hand side target matrix
1123  , typename MT4 // Type of the left-hand side matrix operand
1124  , typename MT5 > // Type of the right-hand side matrix operand
1125  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1126  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1127  {
1128  typedef IntrinsicTrait<ElementType> IT;
1129 
1130  const size_t M( A.rows() );
1131  const size_t N( B.columns() );
1132  const size_t K( A.columns() );
1133 
1134  size_t j( 0UL );
1135 
1136  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1137  for( size_t i=0UL; i<M; ++i ) {
1138  IntrinsicType xmm1( (~C).load(i,j ) );
1139  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1140  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1141  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1142  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1143  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1144  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1145  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1146  for( size_t k=0UL; k<K; ++k ) {
1147  const IntrinsicType a1( set( A(i,k) ) );
1148  xmm1 = xmm1 + a1 * B.load(k,j );
1149  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1150  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1151  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1152  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
1153  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
1154  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
1155  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
1156  }
1157  (~C).store( i, j , xmm1 );
1158  (~C).store( i, j+IT::size , xmm2 );
1159  (~C).store( i, j+IT::size*2UL, xmm3 );
1160  (~C).store( i, j+IT::size*3UL, xmm4 );
1161  (~C).store( i, j+IT::size*4UL, xmm5 );
1162  (~C).store( i, j+IT::size*5UL, xmm6 );
1163  (~C).store( i, j+IT::size*6UL, xmm7 );
1164  (~C).store( i, j+IT::size*7UL, xmm8 );
1165  }
1166  }
1167  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1168  size_t i( 0UL );
1169  for( ; (i+2UL) <= M; i+=2UL ) {
1170  IntrinsicType xmm1( (~C).load(i ,j ) );
1171  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1172  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1173  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1174  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1175  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1176  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1177  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1178  for( size_t k=0UL; k<K; ++k ) {
1179  const IntrinsicType a1( set( A(i ,k) ) );
1180  const IntrinsicType a2( set( A(i+1UL,k) ) );
1181  const IntrinsicType b1( B.load(k,j ) );
1182  const IntrinsicType b2( B.load(k,j+IT::size ) );
1183  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
1184  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
1185  xmm1 = xmm1 + a1 * b1;
1186  xmm2 = xmm2 + a1 * b2;
1187  xmm3 = xmm3 + a1 * b3;
1188  xmm4 = xmm4 + a1 * b4;
1189  xmm5 = xmm5 + a2 * b1;
1190  xmm6 = xmm6 + a2 * b2;
1191  xmm7 = xmm7 + a2 * b3;
1192  xmm8 = xmm8 + a2 * b4;
1193  }
1194  (~C).store( i , j , xmm1 );
1195  (~C).store( i , j+IT::size , xmm2 );
1196  (~C).store( i , j+IT::size*2UL, xmm3 );
1197  (~C).store( i , j+IT::size*3UL, xmm4 );
1198  (~C).store( i+1UL, j , xmm5 );
1199  (~C).store( i+1UL, j+IT::size , xmm6 );
1200  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
1201  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
1202  }
1203  if( i < M ) {
1204  IntrinsicType xmm1( (~C).load(i,j ) );
1205  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1206  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1207  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1208  for( size_t k=0UL; k<K; ++k ) {
1209  const IntrinsicType a1( set( A(i,k) ) );
1210  xmm1 = xmm1 + a1 * B.load(k,j );
1211  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1212  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1213  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1214  }
1215  (~C).store( i, j , xmm1 );
1216  (~C).store( i, j+IT::size , xmm2 );
1217  (~C).store( i, j+IT::size*2UL, xmm3 );
1218  (~C).store( i, j+IT::size*3UL, xmm4 );
1219  }
1220  }
1221  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1222  size_t i( 0UL );
1223  for( ; (i+2UL) <= M; i+=2UL ) {
1224  IntrinsicType xmm1( (~C).load(i ,j ) );
1225  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
1226  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1227  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
1228  for( size_t k=0UL; k<K; ++k ) {
1229  const IntrinsicType a1( set( A(i ,k) ) );
1230  const IntrinsicType a2( set( A(i+1UL,k) ) );
1231  const IntrinsicType b1( B.load(k,j ) );
1232  const IntrinsicType b2( B.load(k,j+IT::size) );
1233  xmm1 = xmm1 + a1 * b1;
1234  xmm2 = xmm2 + a1 * b2;
1235  xmm3 = xmm3 + a2 * b1;
1236  xmm4 = xmm4 + a2 * b2;
1237  }
1238  (~C).store( i , j , xmm1 );
1239  (~C).store( i , j+IT::size, xmm2 );
1240  (~C).store( i+1UL, j , xmm3 );
1241  (~C).store( i+1UL, j+IT::size, xmm4 );
1242  }
1243  if( i < M ) {
1244  IntrinsicType xmm1( (~C).load(i,j ) );
1245  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
1246  for( size_t k=0UL; k<K; ++k ) {
1247  const IntrinsicType a1( set( A(i,k) ) );
1248  xmm1 = xmm1 + a1 * B.load(k,j );
1249  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
1250  }
1251  (~C).store( i, j , xmm1 );
1252  (~C).store( i, j+IT::size, xmm2 );
1253  }
1254  }
1255  if( j < N ) {
1256  size_t i( 0UL );
1257  for( ; (i+2UL) <= M; i+=2UL ) {
1258  IntrinsicType xmm1( (~C).load(i ,j) );
1259  IntrinsicType xmm2( (~C).load(i+1UL,j) );
1260  for( size_t k=0UL; k<K; ++k ) {
1261  const IntrinsicType b1( B.load(k,j) );
1262  xmm1 = xmm1 + set( A(i ,k) ) * b1;
1263  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
1264  }
1265  (~C).store( i , j, xmm1 );
1266  (~C).store( i+1UL, j, xmm2 );
1267  }
1268  if( i < M ) {
1269  IntrinsicType xmm1( (~C).load(i,j) );
1270  for( size_t k=0UL; k<K; ++k ) {
1271  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
1272  }
1273  (~C).store( i, j, xmm1 );
1274  }
1275  }
1276  }
1278  //**********************************************************************************************
1279 
1280  //**Vectorized default addition assignment to column-major dense matrices***********************
1294  template< typename MT3 // Type of the left-hand side target matrix
1295  , typename MT4 // Type of the left-hand side matrix operand
1296  , typename MT5 > // Type of the right-hand side matrix operand
1297  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1298  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1299  {
1302 
1303  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1304  const typename MT4::OppositeType tmp( A );
1305  addAssign( ~C, tmp * B );
1306  }
1307  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1308  const typename MT5::OppositeType tmp( B );
1309  addAssign( ~C, A * tmp );
1310  }
1311  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1312  const typename MT4::OppositeType tmp( A );
1313  addAssign( ~C, tmp * B );
1314  }
1315  else {
1316  const typename MT5::OppositeType tmp( B );
1317  addAssign( ~C, A * tmp );
1318  }
1319  }
1321  //**********************************************************************************************
1322 
1323  //**BLAS-based addition assignment to dense matrices (default)**********************************
1337  template< typename MT3 // Type of the left-hand side target matrix
1338  , typename MT4 // Type of the left-hand side matrix operand
1339  , typename MT5 > // Type of the right-hand side matrix operand
1340  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1341  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1342  {
1343  selectDefaultAddAssignKernel( C, A, B );
1344  }
1346  //**********************************************************************************************
1347 
1348  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1349 #if BLAZE_BLAS_MODE
1350 
1363  template< typename MT3 // Type of the left-hand side target matrix
1364  , typename MT4 // Type of the left-hand side matrix operand
1365  , typename MT5 > // Type of the right-hand side matrix operand
1366  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1367  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1368  {
1369  using boost::numeric_cast;
1370 
1374 
1375  const int M ( numeric_cast<int>( A.rows() ) );
1376  const int N ( numeric_cast<int>( B.columns() ) );
1377  const int K ( numeric_cast<int>( A.columns() ) );
1378  const int lda( numeric_cast<int>( A.spacing() ) );
1379  const int ldb( numeric_cast<int>( B.spacing() ) );
1380  const int ldc( numeric_cast<int>( C.spacing() ) );
1381 
1382  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1383  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1384  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1385  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1386  }
1388 #endif
1389  //**********************************************************************************************
1390 
1391  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1392 #if BLAZE_BLAS_MODE
1393 
1406  template< typename MT3 // Type of the left-hand side target matrix
1407  , typename MT4 // Type of the left-hand side matrix operand
1408  , typename MT5 > // Type of the right-hand side matrix operand
1409  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1410  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1411  {
1412  using boost::numeric_cast;
1413 
1417 
1418  const int M ( numeric_cast<int>( A.rows() ) );
1419  const int N ( numeric_cast<int>( B.columns() ) );
1420  const int K ( numeric_cast<int>( A.columns() ) );
1421  const int lda( numeric_cast<int>( A.spacing() ) );
1422  const int ldb( numeric_cast<int>( B.spacing() ) );
1423  const int ldc( numeric_cast<int>( C.spacing() ) );
1424 
1425  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1426  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1427  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1428  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1429  }
1431 #endif
1432  //**********************************************************************************************
1433 
1434  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1435 #if BLAZE_BLAS_MODE
1436 
1449  template< typename MT3 // Type of the left-hand side target matrix
1450  , typename MT4 // Type of the left-hand side matrix operand
1451  , typename MT5 > // Type of the right-hand side matrix operand
1452  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1453  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1454  {
1455  using boost::numeric_cast;
1456 
1460  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1461  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1462  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1463 
1464  const int M ( numeric_cast<int>( A.rows() ) );
1465  const int N ( numeric_cast<int>( B.columns() ) );
1466  const int K ( numeric_cast<int>( A.columns() ) );
1467  const int lda( numeric_cast<int>( A.spacing() ) );
1468  const int ldb( numeric_cast<int>( B.spacing() ) );
1469  const int ldc( numeric_cast<int>( C.spacing() ) );
1470  const complex<float> alpha( 1.0F, 0.0F );
1471  const complex<float> beta ( 1.0F, 0.0F );
1472 
1473  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1474  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1475  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1476  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1477  }
1479 #endif
1480  //**********************************************************************************************
1481 
1482  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1483 #if BLAZE_BLAS_MODE
1484 
1497  template< typename MT3 // Type of the left-hand side target matrix
1498  , typename MT4 // Type of the left-hand side matrix operand
1499  , typename MT5 > // Type of the right-hand side matrix operand
1500  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1501  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1502  {
1503  using boost::numeric_cast;
1504 
1508  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1509  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1510  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1511 
1512  const int M ( numeric_cast<int>( A.rows() ) );
1513  const int N ( numeric_cast<int>( B.columns() ) );
1514  const int K ( numeric_cast<int>( A.columns() ) );
1515  const int lda( numeric_cast<int>( A.spacing() ) );
1516  const int ldb( numeric_cast<int>( B.spacing() ) );
1517  const int ldc( numeric_cast<int>( C.spacing() ) );
1518  const complex<double> alpha( 1.0, 0.0 );
1519  const complex<double> beta ( 1.0, 0.0 );
1520 
1521  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1522  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1523  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1524  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1525  }
1527 #endif
1528  //**********************************************************************************************
1529 
1530  //**Addition assignment to sparse matrices******************************************************
1531  // No special implementation for the addition assignment to sparse matrices.
1532  //**********************************************************************************************
1533 
1534  //**Subtraction assignment to dense matrices****************************************************
1547  template< typename MT3 // Type of the target dense matrix
1548  , bool SO > // Storage order of the target dense matrix
1549  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatDMatMultExpr& rhs )
1550  {
1552 
1553  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1554  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1555 
1556  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1557  return;
1558  }
1559 
1560  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1561  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1562 
1563  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1564  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1565  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1566  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1567  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1568  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1569 
1570  DMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1571  }
1573  //**********************************************************************************************
1574 
1575  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1586  template< typename MT3 // Type of the left-hand side target matrix
1587  , typename MT4 // Type of the left-hand side matrix operand
1588  , typename MT5 > // Type of the right-hand side matrix operand
1589  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1590  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1591  {
1592  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
1593  DMatDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1594  else
1595  DMatDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1596  }
1598  //**********************************************************************************************
1599 
1600  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1611  template< typename MT3 // Type of the left-hand side target matrix
1612  , typename MT4 // Type of the left-hand side matrix operand
1613  , typename MT5 > // Type of the right-hand side matrix operand
1614  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1615  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1616  {
1617  smpSubAssign( C, A * B );
1618  }
1620  //**********************************************************************************************
1621 
1622  //**Default subtraction assignment to dense matrices********************************************
1636  template< typename MT3 // Type of the left-hand side target matrix
1637  , typename MT4 // Type of the left-hand side matrix operand
1638  , typename MT5 > // Type of the right-hand side matrix operand
1639  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1640  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1641  {
1642  const size_t M( A.rows() );
1643  const size_t N( B.columns() );
1644  const size_t K( A.columns() );
1645 
1646  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1647  const size_t end( N & size_t(-2) );
1648 
1649  for( size_t i=0UL; i<M; ++i ) {
1650  for( size_t k=0UL; k<K; ++k ) {
1651  for( size_t j=0UL; j<end; j+=2UL ) {
1652  C(i,j ) -= A(i,k) * B(k,j );
1653  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1654  }
1655  if( end < N ) {
1656  C(i,end) -= A(i,k) * B(k,end);
1657  }
1658  }
1659  }
1660  }
1662  //**********************************************************************************************
1663 
1664  //**Vectorized default subtraction assignment to row-major dense matrices***********************
1678  template< typename MT3 // Type of the left-hand side target matrix
1679  , typename MT4 // Type of the left-hand side matrix operand
1680  , typename MT5 > // Type of the right-hand side matrix operand
1681  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1682  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1683  {
1684  typedef IntrinsicTrait<ElementType> IT;
1685 
1686  const size_t M( A.rows() );
1687  const size_t N( B.columns() );
1688  const size_t K( A.columns() );
1689 
1690  size_t j( 0UL );
1691 
1692  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1693  for( size_t i=0UL; i<M; ++i ) {
1694  IntrinsicType xmm1( (~C).load(i,j ) );
1695  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1696  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1697  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1698  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
1699  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
1700  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
1701  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
1702  for( size_t k=0UL; k<K; ++k ) {
1703  const IntrinsicType a1( set( A(i,k) ) );
1704  xmm1 = xmm1 - a1 * B.load(k,j );
1705  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1706  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1707  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1708  xmm5 = xmm5 - a1 * B.load(k,j+IT::size*4UL);
1709  xmm6 = xmm6 - a1 * B.load(k,j+IT::size*5UL);
1710  xmm7 = xmm7 - a1 * B.load(k,j+IT::size*6UL);
1711  xmm8 = xmm8 - a1 * B.load(k,j+IT::size*7UL);
1712  }
1713  (~C).store( i, j , xmm1 );
1714  (~C).store( i, j+IT::size , xmm2 );
1715  (~C).store( i, j+IT::size*2UL, xmm3 );
1716  (~C).store( i, j+IT::size*3UL, xmm4 );
1717  (~C).store( i, j+IT::size*4UL, xmm5 );
1718  (~C).store( i, j+IT::size*5UL, xmm6 );
1719  (~C).store( i, j+IT::size*6UL, xmm7 );
1720  (~C).store( i, j+IT::size*7UL, xmm8 );
1721  }
1722  }
1723  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1724  size_t i( 0UL );
1725  for( ; (i+2UL) <= M; i+=2UL ) {
1726  IntrinsicType xmm1( (~C).load(i ,j ) );
1727  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
1728  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
1729  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
1730  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1731  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
1732  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
1733  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
1734  for( size_t k=0UL; k<K; ++k ) {
1735  const IntrinsicType a1( set( A(i ,k) ) );
1736  const IntrinsicType a2( set( A(i+1UL,k) ) );
1737  const IntrinsicType b1( B.load(k,j ) );
1738  const IntrinsicType b2( B.load(k,j+IT::size ) );
1739  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
1740  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
1741  xmm1 = xmm1 - a1 * b1;
1742  xmm2 = xmm2 - a1 * b2;
1743  xmm3 = xmm3 - a1 * b3;
1744  xmm4 = xmm4 - a1 * b4;
1745  xmm5 = xmm5 - a2 * b1;
1746  xmm6 = xmm6 - a2 * b2;
1747  xmm7 = xmm7 - a2 * b3;
1748  xmm8 = xmm8 - a2 * b4;
1749  }
1750  (~C).store( i , j , xmm1 );
1751  (~C).store( i , j+IT::size , xmm2 );
1752  (~C).store( i , j+IT::size*2UL, xmm3 );
1753  (~C).store( i , j+IT::size*3UL, xmm4 );
1754  (~C).store( i+1UL, j , xmm5 );
1755  (~C).store( i+1UL, j+IT::size , xmm6 );
1756  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
1757  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
1758  }
1759  if( i < M ) {
1760  IntrinsicType xmm1( (~C).load(i,j ) );
1761  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
1762  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
1763  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
1764  for( size_t k=0UL; k<K; ++k ) {
1765  const IntrinsicType a1( set( A(i,k) ) );
1766  xmm1 = xmm1 - a1 * B.load(k,j );
1767  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1768  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1769  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1770  }
1771  (~C).store( i, j , xmm1 );
1772  (~C).store( i, j+IT::size , xmm2 );
1773  (~C).store( i, j+IT::size*2UL, xmm3 );
1774  (~C).store( i, j+IT::size*3UL, xmm4 );
1775  }
1776  }
1777  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1778  size_t i( 0UL );
1779  for( ; (i+2UL) <= M; i+=2UL ) {
1780  IntrinsicType xmm1( (~C).load(i ,j ) );
1781  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
1782  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1783  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
1784  for( size_t k=0UL; k<K; ++k ) {
1785  const IntrinsicType a1( set( A(i ,k) ) );
1786  const IntrinsicType a2( set( A(i+1UL,k) ) );
1787  const IntrinsicType b1( B.load(k,j ) );
1788  const IntrinsicType b2( B.load(k,j+IT::size) );
1789  xmm1 = xmm1 - a1 * b1;
1790  xmm2 = xmm2 - a1 * b2;
1791  xmm3 = xmm3 - a2 * b1;
1792  xmm4 = xmm4 - a2 * b2;
1793  }
1794  (~C).store( i , j , xmm1 );
1795  (~C).store( i , j+IT::size, xmm2 );
1796  (~C).store( i+1UL, j , xmm3 );
1797  (~C).store( i+1UL, j+IT::size, xmm4 );
1798  }
1799  if( i < M ) {
1800  IntrinsicType xmm1( (~C).load(i,j ) );
1801  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
1802  for( size_t k=0UL; k<K; ++k ) {
1803  const IntrinsicType a1( set( A(i,k) ) );
1804  xmm1 = xmm1 - a1 * B.load(k,j );
1805  xmm2 = xmm2 - a1 * B.load(k,j+IT::size);
1806  }
1807  (~C).store( i, j , xmm1 );
1808  (~C).store( i, j+IT::size, xmm2 );
1809  }
1810  }
1811  if( j < N ) {
1812  size_t i( 0UL );
1813  for( ; (i+2UL) <= M; i+=2UL ) {
1814  IntrinsicType xmm1( (~C).load(i ,j) );
1815  IntrinsicType xmm2( (~C).load(i+1UL,j) );
1816  for( size_t k=0UL; k<K; ++k ) {
1817  const IntrinsicType b1( B.load(k,j) );
1818  xmm1 = xmm1 - set( A(i ,k) ) * b1;
1819  xmm2 = xmm2 - set( A(i+1UL,k) ) * b1;
1820  }
1821  (~C).store( i , j, xmm1 );
1822  (~C).store( i+1UL, j, xmm2 );
1823  }
1824  if( i < M ) {
1825  IntrinsicType xmm1( (~C).load(i,j) );
1826  for( size_t k=0UL; k<K; ++k ) {
1827  xmm1 = xmm1 - set( A(i,k) ) * B.load(k,j);
1828  }
1829  (~C).store( i, j, xmm1 );
1830  }
1831  }
1832  }
1834  //**********************************************************************************************
1835 
1836  //**Vectorized default subtraction assignment to column-major dense matrices********************
1850  template< typename MT3 // Type of the left-hand side target matrix
1851  , typename MT4 // Type of the left-hand side matrix operand
1852  , typename MT5 > // Type of the right-hand side matrix operand
1853  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1854  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1855  {
1858 
1859  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1860  const typename MT4::OppositeType tmp( A );
1861  subAssign( ~C, tmp * B );
1862  }
1863  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1864  const typename MT5::OppositeType tmp( B );
1865  subAssign( ~C, A * tmp );
1866  }
1867  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1868  const typename MT4::OppositeType tmp( A );
1869  subAssign( ~C, tmp * B );
1870  }
1871  else {
1872  const typename MT5::OppositeType tmp( B );
1873  subAssign( ~C, A * tmp );
1874  }
1875  }
1877  //**********************************************************************************************
1878 
1879  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
1893  template< typename MT3 // Type of the left-hand side target matrix
1894  , typename MT4 // Type of the left-hand side matrix operand
1895  , typename MT5 > // Type of the right-hand side matrix operand
1896  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1897  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1898  {
1899  selectDefaultSubAssignKernel( C, A, B );
1900  }
1902  //**********************************************************************************************
1903 
1904  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1905 #if BLAZE_BLAS_MODE
1906 
1919  template< typename MT3 // Type of the left-hand side target matrix
1920  , typename MT4 // Type of the left-hand side matrix operand
1921  , typename MT5 > // Type of the right-hand side matrix operand
1922  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1923  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1924  {
1925  using boost::numeric_cast;
1926 
1930 
1931  const int M ( numeric_cast<int>( A.rows() ) );
1932  const int N ( numeric_cast<int>( B.columns() ) );
1933  const int K ( numeric_cast<int>( A.columns() ) );
1934  const int lda( numeric_cast<int>( A.spacing() ) );
1935  const int ldb( numeric_cast<int>( B.spacing() ) );
1936  const int ldc( numeric_cast<int>( C.spacing() ) );
1937 
1938  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1939  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1940  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1941  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1942  }
1944 #endif
1945  //**********************************************************************************************
1946 
1947  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1948 #if BLAZE_BLAS_MODE
1949 
1962  template< typename MT3 // Type of the left-hand side target matrix
1963  , typename MT4 // Type of the left-hand side matrix operand
1964  , typename MT5 > // Type of the right-hand side matrix operand
1965  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1966  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1967  {
1968  using boost::numeric_cast;
1969 
1973 
1974  const int M ( numeric_cast<int>( A.rows() ) );
1975  const int N ( numeric_cast<int>( B.columns() ) );
1976  const int K ( numeric_cast<int>( A.columns() ) );
1977  const int lda( numeric_cast<int>( A.spacing() ) );
1978  const int ldb( numeric_cast<int>( B.spacing() ) );
1979  const int ldc( numeric_cast<int>( C.spacing() ) );
1980 
1981  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1982  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1983  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1984  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1985  }
1987 #endif
1988  //**********************************************************************************************
1989 
1990  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1991 #if BLAZE_BLAS_MODE
1992 
2005  template< typename MT3 // Type of the left-hand side target matrix
2006  , typename MT4 // Type of the left-hand side matrix operand
2007  , typename MT5 > // Type of the right-hand side matrix operand
2008  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2009  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2010  {
2011  using boost::numeric_cast;
2012 
2016  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2017  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2018  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2019 
2020  const int M ( numeric_cast<int>( A.rows() ) );
2021  const int N ( numeric_cast<int>( B.columns() ) );
2022  const int K ( numeric_cast<int>( A.columns() ) );
2023  const int lda( numeric_cast<int>( A.spacing() ) );
2024  const int ldb( numeric_cast<int>( B.spacing() ) );
2025  const int ldc( numeric_cast<int>( C.spacing() ) );
2026  const complex<float> alpha( -1.0F, 0.0F );
2027  const complex<float> beta ( 1.0F, 0.0F );
2028 
2029  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2030  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2031  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2032  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2033  }
2035 #endif
2036  //**********************************************************************************************
2037 
2038  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
2039 #if BLAZE_BLAS_MODE
2040 
2053  template< typename MT3 // Type of the left-hand side target matrix
2054  , typename MT4 // Type of the left-hand side matrix operand
2055  , typename MT5 > // Type of the right-hand side matrix operand
2056  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2057  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2058  {
2059  using boost::numeric_cast;
2060 
2064  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2065  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2066  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2067 
2068  const int M ( numeric_cast<int>( A.rows() ) );
2069  const int N ( numeric_cast<int>( B.columns() ) );
2070  const int K ( numeric_cast<int>( A.columns() ) );
2071  const int lda( numeric_cast<int>( A.spacing() ) );
2072  const int ldb( numeric_cast<int>( B.spacing() ) );
2073  const int ldc( numeric_cast<int>( C.spacing() ) );
2074  const complex<double> alpha( -1.0, 0.0 );
2075  const complex<double> beta ( 1.0, 0.0 );
2076 
2077  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2078  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2079  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2080  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2081  }
2083 #endif
2084  //**********************************************************************************************
2085 
2086  //**Subtraction assignment to sparse matrices***************************************************
2087  // No special implementation for the subtraction assignment to sparse matrices.
2088  //**********************************************************************************************
2089 
2090  //**Multiplication assignment to dense matrices*************************************************
2091  // No special implementation for the multiplication assignment to dense matrices.
2092  //**********************************************************************************************
2093 
2094  //**Multiplication assignment to sparse matrices************************************************
2095  // No special implementation for the multiplication assignment to sparse matrices.
2096  //**********************************************************************************************
2097 
2098  //**Compile time checks*************************************************************************
2105  //**********************************************************************************************
2106 };
2107 //*************************************************************************************************
2108 
2109 
2110 
2111 
2112 //=================================================================================================
2113 //
2114 // DMATSCALARMULTEXPR SPECIALIZATION
2115 //
2116 //=================================================================================================
2117 
2118 //*************************************************************************************************
2126 template< typename MT1 // Type of the left-hand side dense matrix
2127  , typename MT2 // Type of the right-hand side dense matrix
2128  , typename ST > // Type of the right-hand side scalar value
2129 class DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >
2130  : public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
2131  , private MatScalarMultExpr
2132  , private Computation
2133 {
2134  private:
2135  //**Type definitions****************************************************************************
2136  typedef DMatDMatMultExpr<MT1,MT2> MMM;
2137  typedef typename MMM::ResultType RES;
2138  typedef typename MT1::ResultType RT1;
2139  typedef typename MT2::ResultType RT2;
2140  typedef typename RT1::ElementType ET1;
2141  typedef typename RT2::ElementType ET2;
2142  typedef typename MT1::CompositeType CT1;
2143  typedef typename MT2::CompositeType CT2;
2144  //**********************************************************************************************
2145 
2146  //**********************************************************************************************
2148  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2149  //**********************************************************************************************
2150 
2151  //**********************************************************************************************
2153  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2154  //**********************************************************************************************
2155 
2156  //**********************************************************************************************
2158 
2160  template< typename T1, typename T2, typename T3, typename T4 >
2161  struct UseSMPAssignKernel {
2162  enum { value = evaluateLeft || evaluateRight };
2163  };
2164  //**********************************************************************************************
2165 
2166  //**********************************************************************************************
2168 
2171  template< typename T1, typename T2, typename T3, typename T4 >
2172  struct UseSinglePrecisionKernel {
2173  enum { value = IsFloat<typename T1::ElementType>::value &&
2174  IsFloat<typename T2::ElementType>::value &&
2175  IsFloat<typename T3::ElementType>::value &&
2176  !IsComplex<T4>::value };
2177  };
2178  //**********************************************************************************************
2179 
2180  //**********************************************************************************************
2182 
2185  template< typename T1, typename T2, typename T3, typename T4 >
2186  struct UseDoublePrecisionKernel {
2187  enum { value = IsDouble<typename T1::ElementType>::value &&
2188  IsDouble<typename T2::ElementType>::value &&
2189  IsDouble<typename T3::ElementType>::value &&
2190  !IsComplex<T4>::value };
2191  };
2192  //**********************************************************************************************
2193 
2194  //**********************************************************************************************
2196 
2199  template< typename T1, typename T2, typename T3 >
2200  struct UseSinglePrecisionComplexKernel {
2201  typedef complex<float> Type;
2202  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2203  IsSame<typename T2::ElementType,Type>::value &&
2204  IsSame<typename T3::ElementType,Type>::value };
2205  };
2206  //**********************************************************************************************
2207 
2208  //**********************************************************************************************
2210 
2213  template< typename T1, typename T2, typename T3 >
2214  struct UseDoublePrecisionComplexKernel {
2215  typedef complex<double> Type;
2216  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2217  IsSame<typename T2::ElementType,Type>::value &&
2218  IsSame<typename T3::ElementType,Type>::value };
2219  };
2220  //**********************************************************************************************
2221 
2222  //**********************************************************************************************
2224 
2226  template< typename T1, typename T2, typename T3, typename T4 >
2227  struct UseDefaultKernel {
2228  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2229  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2230  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2231  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2232  };
2233  //**********************************************************************************************
2234 
2235  //**********************************************************************************************
2237 
2239  template< typename T1, typename T2, typename T3, typename T4 >
2240  struct UseVectorizedDefaultKernel {
2241  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2242  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2243  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2244  IsSame<typename T1::ElementType,T4>::value &&
2245  IntrinsicTrait<typename T1::ElementType>::addition &&
2246  IntrinsicTrait<typename T1::ElementType>::subtraction &&
2247  IntrinsicTrait<typename T1::ElementType>::multiplication };
2248  };
2249  //**********************************************************************************************
2250 
2251  public:
2252  //**Type definitions****************************************************************************
2253  typedef DMatScalarMultExpr<MMM,ST,false> This;
2254  typedef typename MultTrait<RES,ST>::Type ResultType;
2255  typedef typename ResultType::OppositeType OppositeType;
2256  typedef typename ResultType::TransposeType TransposeType;
2257  typedef typename ResultType::ElementType ElementType;
2258  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2259  typedef const ElementType ReturnType;
2260  typedef const ResultType CompositeType;
2261 
2263  typedef const DMatDMatMultExpr<MT1,MT2> LeftOperand;
2264 
2266  typedef ST RightOperand;
2267 
2269  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2270 
2272  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2273  //**********************************************************************************************
2274 
2275  //**Compilation flags***************************************************************************
2277  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2278  IsSame<ET1,ET2>::value &&
2279  IsSame<ET1,ST>::value &&
2280  IntrinsicTrait<ET1>::addition &&
2281  IntrinsicTrait<ET1>::multiplication };
2282 
2284  enum { smpAssignable = !evaluateLeft && !evaluateRight };
2285  //**********************************************************************************************
2286 
2287  //**Constructor*********************************************************************************
2293  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2294  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2295  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2296  {}
2297  //**********************************************************************************************
2298 
2299  //**Access operator*****************************************************************************
2306  inline ReturnType operator()( size_t i, size_t j ) const {
2307  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2308  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2309  return matrix_(i,j) * scalar_;
2310  }
2311  //**********************************************************************************************
2312 
2313  //**Rows function*******************************************************************************
2318  inline size_t rows() const {
2319  return matrix_.rows();
2320  }
2321  //**********************************************************************************************
2322 
2323  //**Columns function****************************************************************************
2328  inline size_t columns() const {
2329  return matrix_.columns();
2330  }
2331  //**********************************************************************************************
2332 
2333  //**Left operand access*************************************************************************
2338  inline LeftOperand leftOperand() const {
2339  return matrix_;
2340  }
2341  //**********************************************************************************************
2342 
2343  //**Right operand access************************************************************************
2348  inline RightOperand rightOperand() const {
2349  return scalar_;
2350  }
2351  //**********************************************************************************************
2352 
2353  //**********************************************************************************************
2359  template< typename T >
2360  inline bool canAlias( const T* alias ) const {
2361  return matrix_.canAlias( alias );
2362  }
2363  //**********************************************************************************************
2364 
2365  //**********************************************************************************************
2371  template< typename T >
2372  inline bool isAliased( const T* alias ) const {
2373  return matrix_.isAliased( alias );
2374  }
2375  //**********************************************************************************************
2376 
2377  //**********************************************************************************************
2382  inline bool isAligned() const {
2383  return matrix_.isAligned();
2384  }
2385  //**********************************************************************************************
2386 
2387  //**********************************************************************************************
2392  inline bool canSMPAssign() const {
2393  typename MMM::LeftOperand A( matrix_.leftOperand() );
2394  return ( !BLAZE_BLAS_IS_PARALLEL ||
2395  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
2396  ( A.rows() > SMP_DMATDMATMULT_THRESHOLD );
2397  }
2398  //**********************************************************************************************
2399 
2400  private:
2401  //**Member variables****************************************************************************
2402  LeftOperand matrix_;
2403  RightOperand scalar_;
2404  //**********************************************************************************************
2405 
2406  //**Assignment to dense matrices****************************************************************
2415  template< typename MT3 // Type of the target dense matrix
2416  , bool SO > // Storage order of the target dense matrix
2417  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2418  {
2420 
2421  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2422  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2423 
2424  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2425  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2426 
2427  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2428  return;
2429  }
2430  else if( left.columns() == 0UL ) {
2431  reset( ~lhs );
2432  return;
2433  }
2434 
2435  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2436  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2437 
2438  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2439  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2440  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2441  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2442  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2443  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2444 
2445  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2446  }
2447  //**********************************************************************************************
2448 
2449  //**Assignment to dense matrices (kernel selection)*********************************************
2460  template< typename MT3 // Type of the left-hand side target matrix
2461  , typename MT4 // Type of the left-hand side matrix operand
2462  , typename MT5 // Type of the right-hand side matrix operand
2463  , typename ST2 > // Type of the scalar value
2464  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2465  selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2466  {
2467  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
2468  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2469  else
2470  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2471  }
2472  //**********************************************************************************************
2473 
2474  //**Assignment to dense matrices (kernel selection)*********************************************
2485  template< typename MT3 // Type of the left-hand side target matrix
2486  , typename MT4 // Type of the left-hand side matrix operand
2487  , typename MT5 // Type of the right-hand side matrix operand
2488  , typename ST2 > // Type of the scalar value
2489  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2490  selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2491  {
2492  smpAssign( C, A * B * scalar );
2493  }
2494  //**********************************************************************************************
2495 
2496  //**Default assignment to dense matrices********************************************************
2510  template< typename MT3 // Type of the left-hand side target matrix
2511  , typename MT4 // Type of the left-hand side matrix operand
2512  , typename MT5 // Type of the right-hand side matrix operand
2513  , typename ST2 > // Type of the scalar value
2514  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2515  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2516  {
2517  const size_t M( A.rows() );
2518  const size_t N( B.columns() );
2519  const size_t K( A.columns() );
2520 
2521  for( size_t i=0UL; i<M; ++i ) {
2522  for( size_t j=0UL; j<N; ++j ) {
2523  C(i,j) = A(i,0UL) * B(0UL,j);
2524  }
2525  for( size_t k=1UL; k<K; ++k ) {
2526  for( size_t j=0UL; j<N; ++j ) {
2527  C(i,j) += A(i,k) * B(k,j);
2528  }
2529  }
2530  for( size_t j=0UL; j<N; ++j ) {
2531  C(i,j) *= scalar;
2532  }
2533  }
2534  }
2535  //**********************************************************************************************
2536 
2537  //**Vectorized default assignment to row-major dense matrices***********************************
2551  template< typename MT3 // Type of the left-hand side target matrix
2552  , typename MT4 // Type of the left-hand side matrix operand
2553  , typename MT5 // Type of the right-hand side matrix operand
2554  , typename ST2 > // Type of the scalar value
2555  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2556  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2557  {
2558  typedef IntrinsicTrait<ElementType> IT;
2559 
2560  const size_t M( A.rows() );
2561  const size_t N( B.columns() );
2562  const size_t K( A.columns() );
2563 
2564  const IntrinsicType factor( set( scalar ) );
2565 
2566  size_t j( 0UL );
2567 
2568  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2569  for( size_t i=0UL; i<M; ++i ) {
2570  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2571  for( size_t k=0UL; k<K; ++k ) {
2572  const IntrinsicType a1( set( A(i,k) ) );
2573  xmm1 = xmm1 + a1 * B.load(k,j );
2574  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2575  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2576  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2577  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
2578  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
2579  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
2580  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
2581  }
2582  (~C).store( i, j , xmm1 * factor );
2583  (~C).store( i, j+IT::size , xmm2 * factor );
2584  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
2585  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
2586  (~C).store( i, j+IT::size*4UL, xmm5 * factor );
2587  (~C).store( i, j+IT::size*5UL, xmm6 * factor );
2588  (~C).store( i, j+IT::size*6UL, xmm7 * factor );
2589  (~C).store( i, j+IT::size*7UL, xmm8 * factor );
2590  }
2591  }
2592  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2593  size_t i( 0UL );
2594  for( ; (i+2UL) <= M; i+=2UL ) {
2595  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2596  for( size_t k=0UL; k<K; ++k ) {
2597  const IntrinsicType a1( set( A(i ,k) ) );
2598  const IntrinsicType a2( set( A(i+1UL,k) ) );
2599  const IntrinsicType b1( B.load(k,j ) );
2600  const IntrinsicType b2( B.load(k,j+IT::size ) );
2601  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
2602  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
2603  xmm1 = xmm1 + a1 * b1;
2604  xmm2 = xmm2 + a1 * b2;
2605  xmm3 = xmm3 + a1 * b3;
2606  xmm4 = xmm4 + a1 * b4;
2607  xmm5 = xmm5 + a2 * b1;
2608  xmm6 = xmm6 + a2 * b2;
2609  xmm7 = xmm7 + a2 * b3;
2610  xmm8 = xmm8 + a2 * b4;
2611  }
2612  (~C).store( i , j , xmm1 * factor );
2613  (~C).store( i , j+IT::size , xmm2 * factor );
2614  (~C).store( i , j+IT::size*2UL, xmm3 * factor );
2615  (~C).store( i , j+IT::size*3UL, xmm4 * factor );
2616  (~C).store( i+1UL, j , xmm5 * factor );
2617  (~C).store( i+1UL, j+IT::size , xmm6 * factor );
2618  (~C).store( i+1UL, j+IT::size*2UL, xmm7 * factor );
2619  (~C).store( i+1UL, j+IT::size*3UL, xmm8 * factor );
2620  }
2621  if( i < M ) {
2622  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2623  for( size_t k=0UL; k<K; ++k ) {
2624  const IntrinsicType a1( set( A(i,k) ) );
2625  xmm1 = xmm1 + a1 * B.load(k,j );
2626  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2627  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2628  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2629  }
2630  (~C).store( i, j , xmm1 * factor );
2631  (~C).store( i, j+IT::size , xmm2 * factor );
2632  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
2633  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
2634  }
2635  }
2636  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2637  size_t i( 0UL );
2638  for( ; (i+2UL) <= M; i+=2UL ) {
2639  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2640  for( size_t k=0UL; k<K; ++k ) {
2641  const IntrinsicType a1( set( A(i ,k) ) );
2642  const IntrinsicType a2( set( A(i+1UL,k) ) );
2643  const IntrinsicType b1( B.load(k,j ) );
2644  const IntrinsicType b2( B.load(k,j+IT::size) );
2645  xmm1 = xmm1 + a1 * b1;
2646  xmm2 = xmm2 + a1 * b2;
2647  xmm3 = xmm3 + a2 * b1;
2648  xmm4 = xmm4 + a2 * b2;
2649  }
2650  (~C).store( i , j , xmm1 * factor );
2651  (~C).store( i , j+IT::size, xmm2 * factor );
2652  (~C).store( i+1UL, j , xmm3 * factor );
2653  (~C).store( i+1UL, j+IT::size, xmm4 * factor );
2654  }
2655  if( i < M ) {
2656  IntrinsicType xmm1, xmm2;
2657  for( size_t k=0UL; k<K; ++k ) {
2658  const IntrinsicType a1( set( A(i,k) ) );
2659  xmm1 = xmm1 + a1 * B.load(k,j );
2660  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
2661  }
2662  (~C).store( i, j , xmm1 * factor );
2663  (~C).store( i, j+IT::size, xmm2 * factor );
2664  }
2665  }
2666  if( j < N ) {
2667  size_t i( 0UL );
2668  for( ; (i+2UL) <= M; i+=2UL ) {
2669  IntrinsicType xmm1, xmm2;
2670  for( size_t k=0UL; k<K; ++k ) {
2671  const IntrinsicType b1( B.load(k,j) );
2672  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2673  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2674  }
2675  (~C).store( i , j, xmm1 * factor );
2676  (~C).store( i+1UL, j, xmm2 * factor );
2677  }
2678  if( i < M ) {
2679  IntrinsicType xmm1;
2680  for( size_t k=0UL; k<K; ++k ) {
2681  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
2682  }
2683  (~C).store( i, j, xmm1 * factor );
2684  }
2685  }
2686  }
2687  //**********************************************************************************************
2688 
2689  //**Vectorized default assignment to column-major dense matrices********************************
2703  template< typename MT3 // Type of the left-hand side target matrix
2704  , typename MT4 // Type of the left-hand side matrix operand
2705  , typename MT5 // Type of the right-hand side matrix operand
2706  , typename ST2 > // Type of the scalar value
2707  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2708  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2709  {
2712 
2713  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2714  const typename MT4::OppositeType tmp( A );
2715  smpAssign( ~C, tmp * B * scalar );
2716  }
2717  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2718  const typename MT5::OppositeType tmp( B );
2719  smpAssign( ~C, A * tmp * scalar );
2720  }
2721  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2722  const typename MT4::OppositeType tmp( A );
2723  smpAssign( ~C, tmp * B * scalar );
2724  }
2725  else {
2726  const typename MT5::OppositeType tmp( B );
2727  smpAssign( ~C, A * tmp * scalar );
2728  }
2729  }
2730  //**********************************************************************************************
2731 
2732  //**BLAS-based assignment to dense matrices (default)*******************************************
2746  template< typename MT3 // Type of the left-hand side target matrix
2747  , typename MT4 // Type of the left-hand side matrix operand
2748  , typename MT5 // Type of the right-hand side matrix operand
2749  , typename ST2 > // Type of the scalar value
2750  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2751  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2752  {
2753  selectDefaultAssignKernel( C, A, B, scalar );
2754  }
2755  //**********************************************************************************************
2756 
2757  //**BLAS-based assignment to dense matrices (single precision)**********************************
2758 #if BLAZE_BLAS_MODE
2759 
2772  template< typename MT3 // Type of the left-hand side target matrix
2773  , typename MT4 // Type of the left-hand side matrix operand
2774  , typename MT5 // Type of the right-hand side matrix operand
2775  , typename ST2 > // Type of the scalar value
2776  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2777  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2778  {
2779  using boost::numeric_cast;
2780 
2784 
2785  const int M ( numeric_cast<int>( A.rows() ) );
2786  const int N ( numeric_cast<int>( B.columns() ) );
2787  const int K ( numeric_cast<int>( A.columns() ) );
2788  const int lda( numeric_cast<int>( A.spacing() ) );
2789  const int ldb( numeric_cast<int>( B.spacing() ) );
2790  const int ldc( numeric_cast<int>( C.spacing() ) );
2791 
2792  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2793  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2794  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2795  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2796  }
2797 #endif
2798  //**********************************************************************************************
2799 
2800  //**BLAS-based assignment to dense matrices (double precision)**********************************
2801 #if BLAZE_BLAS_MODE
2802 
2815  template< typename MT3 // Type of the left-hand side target matrix
2816  , typename MT4 // Type of the left-hand side matrix operand
2817  , typename MT5 // Type of the right-hand side matrix operand
2818  , typename ST2 > // Type of the scalar value
2819  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2820  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2821  {
2822  using boost::numeric_cast;
2823 
2827 
2828  const int M ( numeric_cast<int>( A.rows() ) );
2829  const int N ( numeric_cast<int>( B.columns() ) );
2830  const int K ( numeric_cast<int>( A.columns() ) );
2831  const int lda( numeric_cast<int>( A.spacing() ) );
2832  const int ldb( numeric_cast<int>( B.spacing() ) );
2833  const int ldc( numeric_cast<int>( C.spacing() ) );
2834 
2835  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2836  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2837  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2838  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2839  }
2840 #endif
2841  //**********************************************************************************************
2842 
2843  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2844 #if BLAZE_BLAS_MODE
2845 
2858  template< typename MT3 // Type of the left-hand side target matrix
2859  , typename MT4 // Type of the left-hand side matrix operand
2860  , typename MT5 // Type of the right-hand side matrix operand
2861  , typename ST2 > // Type of the scalar value
2862  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2863  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2864  {
2865  using boost::numeric_cast;
2866 
2870  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2871  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2872  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2873 
2874  const int M ( numeric_cast<int>( A.rows() ) );
2875  const int N ( numeric_cast<int>( B.columns() ) );
2876  const int K ( numeric_cast<int>( A.columns() ) );
2877  const int lda( numeric_cast<int>( A.spacing() ) );
2878  const int ldb( numeric_cast<int>( B.spacing() ) );
2879  const int ldc( numeric_cast<int>( C.spacing() ) );
2880  const complex<float> alpha( scalar );
2881  const complex<float> beta ( 0.0F, 0.0F );
2882 
2883  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2884  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2885  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2886  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2887  }
2888 #endif
2889  //**********************************************************************************************
2890 
2891  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2892 #if BLAZE_BLAS_MODE
2893 
2906  template< typename MT3 // Type of the left-hand side target matrix
2907  , typename MT4 // Type of the left-hand side matrix operand
2908  , typename MT5 // Type of the right-hand side matrix operand
2909  , typename ST2 > // Type of the scalar
2910  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2911  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2912  {
2913  using boost::numeric_cast;
2914 
2918  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2919  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2920  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2921 
2922  const int M ( numeric_cast<int>( A.rows() ) );
2923  const int N ( numeric_cast<int>( B.columns() ) );
2924  const int K ( numeric_cast<int>( A.columns() ) );
2925  const int lda( numeric_cast<int>( A.spacing() ) );
2926  const int ldb( numeric_cast<int>( B.spacing() ) );
2927  const int ldc( numeric_cast<int>( C.spacing() ) );
2928  const complex<double> alpha( scalar );
2929  const complex<double> beta ( 0.0, 0.0 );
2930 
2931  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2932  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2933  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2934  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2935  }
2936 #endif
2937  //**********************************************************************************************
2938 
2939  //**Assignment to sparse matrices***************************************************************
2950  template< typename MT // Type of the target sparse matrix
2951  , bool SO > // Storage order of the target sparse matrix
2952  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2953  {
2955 
2956  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2957 
2964 
2965  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2966  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2967 
2968  const TmpType tmp( rhs );
2969  smpAssign( ~lhs, tmp );
2970  }
2971  //**********************************************************************************************
2972 
2973  //**Addition assignment to dense matrices*******************************************************
2985  template< typename MT3 // Type of the target dense matrix
2986  , bool SO > // Storage order of the target dense matrix
2987  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2988  {
2990 
2991  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2992  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2993 
2994  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2995  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2996 
2997  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2998  return;
2999  }
3000 
3001  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3002  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3003 
3004  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3005  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3006  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3007  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3008  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3009  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3010 
3011  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3012  }
3013  //**********************************************************************************************
3014 
3015  //**Addition assignment to dense matrices (kernel selection)************************************
3026  template< typename MT3 // Type of the left-hand side target matrix
3027  , typename MT4 // Type of the left-hand side matrix operand
3028  , typename MT5 // Type of the right-hand side matrix operand
3029  , typename ST2 > // Type of the scalar value
3030  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3031  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3032  {
3033  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
3034  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3035  else
3036  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3037  }
3038  //**********************************************************************************************
3039 
3040  //**Addition assignment to dense matrices (kernel selection)************************************
3051  template< typename MT3 // Type of the left-hand side target matrix
3052  , typename MT4 // Type of the left-hand side matrix operand
3053  , typename MT5 // Type of the right-hand side matrix operand
3054  , typename ST2 > // Type of the scalar value
3055  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3056  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3057  {
3058  smpAddAssign( C, A * B * scalar );
3059  }
3060  //**********************************************************************************************
3061 
3062  //**Default addition assignment to dense matrices***********************************************
3076  template< typename MT3 // Type of the left-hand side target matrix
3077  , typename MT4 // Type of the left-hand side matrix operand
3078  , typename MT5 // Type of the right-hand side matrix operand
3079  , typename ST2 > // Type of the scalar value
3080  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3081  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3082  {
3083  const ResultType tmp( A * B * scalar );
3084  addAssign( C, tmp );
3085  }
3086  //**********************************************************************************************
3087 
3088  //**Vectorized default addition assignment to row-major dense matrices**************************
3102  template< typename MT3 // Type of the left-hand side target matrix
3103  , typename MT4 // Type of the left-hand side matrix operand
3104  , typename MT5 // Type of the right-hand side matrix operand
3105  , typename ST2 > // Type of the scalar value
3106  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3107  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3108  {
3109  typedef IntrinsicTrait<ElementType> IT;
3110 
3111  const size_t M( A.rows() );
3112  const size_t N( B.columns() );
3113  const size_t K( A.columns() );
3114 
3115  const IntrinsicType factor( set( scalar ) );
3116 
3117  size_t j( 0UL );
3118 
3119  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3120  for( size_t i=0UL; i<M; ++i ) {
3121  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3122  for( size_t k=0UL; k<K; ++k ) {
3123  const IntrinsicType a1( set( A(i,k) ) );
3124  xmm1 = xmm1 + a1 * B.load(k,j );
3125  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3126  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3127  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3128  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3129  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3130  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3131  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3132  }
3133  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3134  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3135  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3136  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3137  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) + xmm5 * factor );
3138  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) + xmm6 * factor );
3139  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) + xmm7 * factor );
3140  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) + xmm8 * factor );
3141  }
3142  }
3143  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3144  size_t i( 0UL );
3145  for( ; (i+2UL) <= M; i+=2UL ) {
3146  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3147  for( size_t k=0UL; k<K; ++k ) {
3148  const IntrinsicType a1( set( A(i ,k) ) );
3149  const IntrinsicType a2( set( A(i+1UL,k) ) );
3150  const IntrinsicType b1( B.load(k,j ) );
3151  const IntrinsicType b2( B.load(k,j+IT::size ) );
3152  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3153  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3154  xmm1 = xmm1 + a1 * b1;
3155  xmm2 = xmm2 + a1 * b2;
3156  xmm3 = xmm3 + a1 * b3;
3157  xmm4 = xmm4 + a1 * b4;
3158  xmm5 = xmm5 + a2 * b1;
3159  xmm6 = xmm6 + a2 * b2;
3160  xmm7 = xmm7 + a2 * b3;
3161  xmm8 = xmm8 + a2 * b4;
3162  }
3163  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3164  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) + xmm2 * factor );
3165  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) + xmm3 * factor );
3166  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) + xmm4 * factor );
3167  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
3168  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) + xmm6 * factor );
3169  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) + xmm7 * factor );
3170  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) + xmm8 * factor );
3171  }
3172  if( i < M ) {
3173  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3174  for( size_t k=0UL; k<K; ++k ) {
3175  const IntrinsicType a1( set( A(i,k) ) );
3176  xmm1 = xmm1 + a1 * B.load(k,j );
3177  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3178  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3179  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3180  }
3181  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3182  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3183  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3184  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3185  }
3186  }
3187  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3188  size_t i( 0UL );
3189  for( ; (i+2UL) <= M; i+=2UL ) {
3190  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3191  for( size_t k=0UL; k<K; ++k ) {
3192  const IntrinsicType a1( set( A(i ,k) ) );
3193  const IntrinsicType a2( set( A(i+1UL,k) ) );
3194  const IntrinsicType b1( B.load(k,j ) );
3195  const IntrinsicType b2( B.load(k,j+IT::size) );
3196  xmm1 = xmm1 + a1 * b1;
3197  xmm2 = xmm2 + a1 * b2;
3198  xmm3 = xmm3 + a2 * b1;
3199  xmm4 = xmm4 + a2 * b2;
3200  }
3201  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3202  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) + xmm2 * factor );
3203  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
3204  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) + xmm4 * factor );
3205  }
3206  if( i < M ) {
3207  IntrinsicType xmm1, xmm2;
3208  for( size_t k=0UL; k<K; ++k ) {
3209  const IntrinsicType a1( set( A(i,k) ) );
3210  xmm1 = xmm1 + a1 * B.load(k,j );
3211  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3212  }
3213  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
3214  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) + xmm2 * factor );
3215  }
3216  }
3217  if( j < N ) {
3218  size_t i( 0UL );
3219  for( ; (i+2UL) <= M; i+=2UL ) {
3220  IntrinsicType xmm1, xmm2;
3221  for( size_t k=0UL; k<K; ++k ) {
3222  const IntrinsicType b1( B.load(k,j) );
3223  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3224  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3225  }
3226  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
3227  (~C).store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
3228  }
3229  if( i < M ) {
3230  IntrinsicType xmm1;
3231  for( size_t k=0UL; k<K; ++k ) {
3232  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
3233  }
3234  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
3235  }
3236  }
3237  }
3238  //**********************************************************************************************
3239 
3240  //**Vectorized default addition assignment to column-major dense matrices***********************
3254  template< typename MT3 // Type of the left-hand side target matrix
3255  , typename MT4 // Type of the left-hand side matrix operand
3256  , typename MT5 // Type of the right-hand side matrix operand
3257  , typename ST2 > // Type of the scalar value
3258  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3259  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3260  {
3263 
3264  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3265  const typename MT4::OppositeType tmp( A );
3266  addAssign( ~C, tmp * B * scalar );
3267  }
3268  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3269  const typename MT5::OppositeType tmp( B );
3270  addAssign( ~C, A * tmp * scalar );
3271  }
3272  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3273  const typename MT4::OppositeType tmp( A );
3274  addAssign( ~C, tmp * B * scalar );
3275  }
3276  else {
3277  const typename MT5::OppositeType tmp( B );
3278  addAssign( ~C, A * tmp * scalar );
3279  }
3280  }
3281  //**********************************************************************************************
3282 
3283  //**BLAS-based addition assignment to dense matrices (default)**********************************
3297  template< typename MT3 // Type of the left-hand side target matrix
3298  , typename MT4 // Type of the left-hand side matrix operand
3299  , typename MT5 // Type of the right-hand side matrix operand
3300  , typename ST2 > // Type of the scalar value
3301  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3302  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3303  {
3304  selectDefaultAddAssignKernel( C, A, B, scalar );
3305  }
3306  //**********************************************************************************************
3307 
3308  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3309 #if BLAZE_BLAS_MODE
3310 
3323  template< typename MT3 // Type of the left-hand side target matrix
3324  , typename MT4 // Type of the left-hand side matrix operand
3325  , typename MT5 // Type of the right-hand side matrix operand
3326  , typename ST2 > // Type of the scalar value
3327  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3328  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3329  {
3330  using boost::numeric_cast;
3331 
3335 
3336  const int M ( numeric_cast<int>( A.rows() ) );
3337  const int N ( numeric_cast<int>( B.columns() ) );
3338  const int K ( numeric_cast<int>( A.columns() ) );
3339  const int lda( numeric_cast<int>( A.spacing() ) );
3340  const int ldb( numeric_cast<int>( B.spacing() ) );
3341  const int ldc( numeric_cast<int>( C.spacing() ) );
3342 
3343  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3344  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3345  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3346  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3347  }
3348 #endif
3349  //**********************************************************************************************
3350 
3351  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3352 #if BLAZE_BLAS_MODE
3353 
3366  template< typename MT3 // Type of the left-hand side target matrix
3367  , typename MT4 // Type of the left-hand side matrix operand
3368  , typename MT5 // Type of the right-hand side matrix operand
3369  , typename ST2 > // Type of the scalar value
3370  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3371  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3372  {
3373  using boost::numeric_cast;
3374 
3378 
3379  const int M ( numeric_cast<int>( A.rows() ) );
3380  const int N ( numeric_cast<int>( B.columns() ) );
3381  const int K ( numeric_cast<int>( A.columns() ) );
3382  const int lda( numeric_cast<int>( A.spacing() ) );
3383  const int ldb( numeric_cast<int>( B.spacing() ) );
3384  const int ldc( numeric_cast<int>( C.spacing() ) );
3385 
3386  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3387  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3388  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3389  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3390  }
3391 #endif
3392  //**********************************************************************************************
3393 
3394  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3395 #if BLAZE_BLAS_MODE
3396 
3409  template< typename MT3 // Type of the left-hand side target matrix
3410  , typename MT4 // Type of the left-hand side matrix operand
3411  , typename MT5 // Type of the right-hand side matrix operand
3412  , typename ST2 > // Type of the scalar value
3413  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3414  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3415  {
3416  using boost::numeric_cast;
3417 
3421  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3422  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3423  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3424 
3425  const int M ( numeric_cast<int>( A.rows() ) );
3426  const int N ( numeric_cast<int>( B.columns() ) );
3427  const int K ( numeric_cast<int>( A.columns() ) );
3428  const int lda( numeric_cast<int>( A.spacing() ) );
3429  const int ldb( numeric_cast<int>( B.spacing() ) );
3430  const int ldc( numeric_cast<int>( C.spacing() ) );
3431  const complex<float> alpha( scalar );
3432  const complex<float> beta ( 1.0F, 0.0F );
3433 
3434  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3435  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3436  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3437  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3438  }
3439 #endif
3440  //**********************************************************************************************
3441 
3442  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3443 #if BLAZE_BLAS_MODE
3444 
3457  template< typename MT3 // Type of the left-hand side target matrix
3458  , typename MT4 // Type of the left-hand side matrix operand
3459  , typename MT5 // Type of the right-hand side matrix operand
3460  , typename ST2 > // Type of the scalar value
3461  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3462  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3463  {
3464  using boost::numeric_cast;
3465 
3469  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3470  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3471  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3472 
3473  const int M ( numeric_cast<int>( A.rows() ) );
3474  const int N ( numeric_cast<int>( B.columns() ) );
3475  const int K ( numeric_cast<int>( A.columns() ) );
3476  const int lda( numeric_cast<int>( A.spacing() ) );
3477  const int ldb( numeric_cast<int>( B.spacing() ) );
3478  const int ldc( numeric_cast<int>( C.spacing() ) );
3479  const complex<double> alpha( scalar );
3480  const complex<double> beta ( 1.0, 0.0 );
3481 
3482  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3483  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3484  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3485  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3486  }
3487 #endif
3488  //**********************************************************************************************
3489 
3490  //**Addition assignment to sparse matrices******************************************************
3491  // No special implementation for the addition assignment to sparse matrices.
3492  //**********************************************************************************************
3493 
3494  //**Subtraction assignment to dense matrices****************************************************
3506  template< typename MT3 // Type of the target dense matrix
3507  , bool SO > // Storage order of the target dense matrix
3508  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3509  {
3511 
3512  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3513  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3514 
3515  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3516  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3517 
3518  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3519  return;
3520  }
3521 
3522  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3523  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3524 
3525  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3526  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3527  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3528  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3529  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3530  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3531 
3532  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3533  }
3534  //**********************************************************************************************
3535 
3536  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3547  template< typename MT3 // Type of the left-hand side target matrix
3548  , typename MT4 // Type of the left-hand side matrix operand
3549  , typename MT5 // Type of the right-hand side matrix operand
3550  , typename ST2 > // Type of the scalar value
3551  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3552  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3553  {
3554  if( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD )
3555  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3556  else
3557  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3558  }
3559  //**********************************************************************************************
3560 
3561  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3572  template< typename MT3 // Type of the left-hand side target matrix
3573  , typename MT4 // Type of the left-hand side matrix operand
3574  , typename MT5 // Type of the right-hand side matrix operand
3575  , typename ST2 > // Type of the scalar value
3576  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3577  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3578  {
3579  smpSubAssign( C, A * B * scalar );
3580  }
3581  //**********************************************************************************************
3582 
3583  //**Default subtraction assignment to dense matrices********************************************
3597  template< typename MT3 // Type of the left-hand side target matrix
3598  , typename MT4 // Type of the left-hand side matrix operand
3599  , typename MT5 // Type of the right-hand side matrix operand
3600  , typename ST2 > // Type of the scalar value
3601  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3602  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3603  {
3604  const ResultType tmp( A * B * scalar );
3605  subAssign( C, tmp );
3606  }
3607  //**********************************************************************************************
3608 
3609  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3623  template< typename MT3 // Type of the left-hand side target matrix
3624  , typename MT4 // Type of the left-hand side matrix operand
3625  , typename MT5 // Type of the right-hand side matrix operand
3626  , typename ST2 > // Type of the scalar value
3627  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3628  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3629  {
3630  typedef IntrinsicTrait<ElementType> IT;
3631 
3632  const size_t M( A.rows() );
3633  const size_t N( B.columns() );
3634  const size_t K( A.columns() );
3635 
3636  const IntrinsicType factor( set( scalar ) );
3637 
3638  size_t j( 0UL );
3639 
3640  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3641  for( size_t i=0UL; i<M; ++i ) {
3642  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3643  for( size_t k=0UL; k<K; ++k ) {
3644  const IntrinsicType a1( set( A(i,k) ) );
3645  xmm1 = xmm1 + a1 * B.load(k,j );
3646  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3647  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3648  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3649  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3650  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3651  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3652  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3653  }
3654  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3655  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3656  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3657  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3658  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) - xmm5 * factor );
3659  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) - xmm6 * factor );
3660  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) - xmm7 * factor );
3661  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) - xmm8 * factor );
3662  }
3663  }
3664  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3665  size_t i( 0UL );
3666  for( ; (i+2UL) <= M; i+=2UL ) {
3667  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3668  for( size_t k=0UL; k<K; ++k ) {
3669  const IntrinsicType a1( set( A(i ,k) ) );
3670  const IntrinsicType a2( set( A(i+1UL,k) ) );
3671  const IntrinsicType b1( B.load(k,j ) );
3672  const IntrinsicType b2( B.load(k,j+IT::size ) );
3673  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3674  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3675  xmm1 = xmm1 + a1 * b1;
3676  xmm2 = xmm2 + a1 * b2;
3677  xmm3 = xmm3 + a1 * b3;
3678  xmm4 = xmm4 + a1 * b4;
3679  xmm5 = xmm5 + a2 * b1;
3680  xmm6 = xmm6 + a2 * b2;
3681  xmm7 = xmm7 + a2 * b3;
3682  xmm8 = xmm8 + a2 * b4;
3683  }
3684  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3685  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) - xmm2 * factor );
3686  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) - xmm3 * factor );
3687  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) - xmm4 * factor );
3688  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
3689  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) - xmm6 * factor );
3690  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) - xmm7 * factor );
3691  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) - xmm8 * factor );
3692  }
3693  if( i < M ) {
3694  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3695  for( size_t k=0UL; k<K; ++k ) {
3696  const IntrinsicType a1( set( A(i,k) ) );
3697  xmm1 = xmm1 + a1 * B.load(k,j );
3698  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3699  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3700  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3701  }
3702  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3703  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3704  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3705  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3706  }
3707  }
3708  for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3709  size_t i( 0UL );
3710  for( ; (i+2UL) <= M; i+=2UL ) {
3711  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3712  for( size_t k=0UL; k<K; ++k ) {
3713  const IntrinsicType a1( set( A(i ,k) ) );
3714  const IntrinsicType a2( set( A(i+1UL,k) ) );
3715  const IntrinsicType b1( B.load(k,j ) );
3716  const IntrinsicType b2( B.load(k,j+IT::size) );
3717  xmm1 = xmm1 + a1 * b1;
3718  xmm2 = xmm2 + a1 * b2;
3719  xmm3 = xmm3 + a2 * b1;
3720  xmm4 = xmm4 + a2 * b2;
3721  }
3722  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3723  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) - xmm2 * factor );
3724  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
3725  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) - xmm4 * factor );
3726  }
3727  if( i < M ) {
3728  IntrinsicType xmm1, xmm2;
3729  for( size_t k=0UL; k<K; ++k ) {
3730  const IntrinsicType a1( set( A(i,k) ) );
3731  xmm1 = xmm1 + a1 * B.load(k,j );
3732  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3733  }
3734  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
3735  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) - xmm2 * factor );
3736  }
3737  }
3738  if( j < N ) {
3739  size_t i( 0UL );
3740  for( ; (i+2UL) <= M; i+=2UL ) {
3741  IntrinsicType xmm1, xmm2;
3742  for( size_t k=0UL; k<K; ++k ) {
3743  const IntrinsicType b1( B.load(k,j) );
3744  xmm1 = xmm1 + set( A(i ,k) ) * b1;
3745  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
3746  }
3747  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
3748  (~C).store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
3749  }
3750  if( i < M ) {
3751  IntrinsicType xmm1;
3752  for( size_t k=0UL; k<K; ++k ) {
3753  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
3754  }
3755  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
3756  }
3757  }
3758  }
3759  //**********************************************************************************************
3760 
3761  //**Vectorized default subtraction assignment to column-major dense matrices********************
3775  template< typename MT3 // Type of the left-hand side target matrix
3776  , typename MT4 // Type of the left-hand side matrix operand
3777  , typename MT5 // Type of the right-hand side matrix operand
3778  , typename ST2 > // Type of the scalar value
3779  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3780  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3781  {
3784 
3785  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3786  const typename MT4::OppositeType tmp( A );
3787  subAssign( ~C, tmp * B * scalar );
3788  }
3789  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3790  const typename MT5::OppositeType tmp( B );
3791  subAssign( ~C, A * tmp * scalar );
3792  }
3793  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3794  const typename MT4::OppositeType tmp( A );
3795  subAssign( ~C, tmp * B * scalar );
3796  }
3797  else {
3798  const typename MT5::OppositeType tmp( B );
3799  subAssign( ~C, A * tmp * scalar );
3800  }
3801  }
3802  //**********************************************************************************************
3803 
3804  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3818  template< typename MT3 // Type of the left-hand side target matrix
3819  , typename MT4 // Type of the left-hand side matrix operand
3820  , typename MT5 // Type of the right-hand side matrix operand
3821  , typename ST2 > // Type of the scalar value
3822  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3823  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3824  {
3825  selectDefaultSubAssignKernel( C, A, B, scalar );
3826  }
3827  //**********************************************************************************************
3828 
3829  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3830 #if BLAZE_BLAS_MODE
3831 
3844  template< typename MT3 // Type of the left-hand side target matrix
3845  , typename MT4 // Type of the left-hand side matrix operand
3846  , typename MT5 // Type of the right-hand side matrix operand
3847  , typename ST2 > // Type of the scalar value
3848  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3849  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3850  {
3851  using boost::numeric_cast;
3852 
3856 
3857  const int M ( numeric_cast<int>( A.rows() ) );
3858  const int N ( numeric_cast<int>( B.columns() ) );
3859  const int K ( numeric_cast<int>( A.columns() ) );
3860  const int lda( numeric_cast<int>( A.spacing() ) );
3861  const int ldb( numeric_cast<int>( B.spacing() ) );
3862  const int ldc( numeric_cast<int>( C.spacing() ) );
3863 
3864  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3865  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3866  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3867  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3868  }
3869 #endif
3870  //**********************************************************************************************
3871 
3872  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3873 #if BLAZE_BLAS_MODE
3874 
3887  template< typename MT3 // Type of the left-hand side target matrix
3888  , typename MT4 // Type of the left-hand side matrix operand
3889  , typename MT5 // Type of the right-hand side matrix operand
3890  , typename ST2 > // Type of the scalar value
3891  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3892  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3893  {
3894  using boost::numeric_cast;
3895 
3899 
3900  const int M ( numeric_cast<int>( A.rows() ) );
3901  const int N ( numeric_cast<int>( B.columns() ) );
3902  const int K ( numeric_cast<int>( A.columns() ) );
3903  const int lda( numeric_cast<int>( A.spacing() ) );
3904  const int ldb( numeric_cast<int>( B.spacing() ) );
3905  const int ldc( numeric_cast<int>( C.spacing() ) );
3906 
3907  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3908  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3909  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3910  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3911  }
3912 #endif
3913  //**********************************************************************************************
3914 
3915  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3916 #if BLAZE_BLAS_MODE
3917 
3930  template< typename MT3 // Type of the left-hand side target matrix
3931  , typename MT4 // Type of the left-hand side matrix operand
3932  , typename MT5 // Type of the right-hand side matrix operand
3933  , typename ST2 > // Type of the scalar value
3934  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3935  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3936  {
3937  using boost::numeric_cast;
3938 
3942  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3943  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3944  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3945 
3946  const int M ( numeric_cast<int>( A.rows() ) );
3947  const int N ( numeric_cast<int>( B.columns() ) );
3948  const int K ( numeric_cast<int>( A.columns() ) );
3949  const int lda( numeric_cast<int>( A.spacing() ) );
3950  const int ldb( numeric_cast<int>( B.spacing() ) );
3951  const int ldc( numeric_cast<int>( C.spacing() ) );
3952  const complex<float> alpha( -scalar );
3953  const complex<float> beta ( 1.0F, 0.0F );
3954 
3955  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3956  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3957  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3958  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3959  }
3960 #endif
3961  //**********************************************************************************************
3962 
3963  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3964 #if BLAZE_BLAS_MODE
3965 
3978  template< typename MT3 // Type of the left-hand side target matrix
3979  , typename MT4 // Type of the left-hand side matrix operand
3980  , typename MT5 // Type of the right-hand side matrix operand
3981  , typename ST2 > // Type of the scalar value
3982  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3983  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3984  {
3985  using boost::numeric_cast;
3986 
3990  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3991  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3992  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3993 
3994  const int M ( numeric_cast<int>( A.rows() ) );
3995  const int N ( numeric_cast<int>( B.columns() ) );
3996  const int K ( numeric_cast<int>( A.columns() ) );
3997  const int lda( numeric_cast<int>( A.spacing() ) );
3998  const int ldb( numeric_cast<int>( B.spacing() ) );
3999  const int ldc( numeric_cast<int>( C.spacing() ) );
4000  const complex<double> alpha( -scalar );
4001  const complex<double> beta ( 1.0, 0.0 );
4002 
4003  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4004  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4005  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4006  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4007  }
4008 #endif
4009  //**********************************************************************************************
4010 
4011  //**Subtraction assignment to sparse matrices***************************************************
4012  // No special implementation for the subtraction assignment to sparse matrices.
4013  //**********************************************************************************************
4014 
4015  //**Multiplication assignment to dense matrices*************************************************
4016  // No special implementation for the multiplication assignment to dense matrices.
4017  //**********************************************************************************************
4018 
4019  //**Multiplication assignment to sparse matrices************************************************
4020  // No special implementation for the multiplication assignment to sparse matrices.
4021  //**********************************************************************************************
4022 
4023  //**Compile time checks*************************************************************************
4032  //**********************************************************************************************
4033 };
4035 //*************************************************************************************************
4036 
4037 
4038 
4039 
4040 //=================================================================================================
4041 //
4042 // GLOBAL BINARY ARITHMETIC OPERATORS
4043 //
4044 //=================================================================================================
4045 
4046 //*************************************************************************************************
4072 template< typename T1 // Type of the left-hand side dense matrix
4073  , typename T2 > // Type of the right-hand side dense matrix
4074 inline const DMatDMatMultExpr<T1,T2>
4076 {
4078 
4079  if( (~lhs).columns() != (~rhs).rows() )
4080  throw std::invalid_argument( "Matrix sizes do not match" );
4081 
4082  return DMatDMatMultExpr<T1,T2>( ~lhs, ~rhs );
4083 }
4084 //*************************************************************************************************
4085 
4086 
4087 
4088 
4089 //=================================================================================================
4090 //
4091 // EXPRESSION TRAIT SPECIALIZATIONS
4092 //
4093 //=================================================================================================
4094 
4095 //*************************************************************************************************
4097 template< typename MT1, typename MT2, typename VT >
4098 struct DMatDVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
4099 {
4100  public:
4101  //**********************************************************************************************
4102  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4103  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4104  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4105  , typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
4106  , INVALID_TYPE >::Type Type;
4107  //**********************************************************************************************
4108 };
4110 //*************************************************************************************************
4111 
4112 
4113 //*************************************************************************************************
4115 template< typename MT1, typename MT2, typename VT >
4116 struct DMatSVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
4117 {
4118  public:
4119  //**********************************************************************************************
4120  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4121  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4122  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4123  , typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
4124  , INVALID_TYPE >::Type Type;
4125  //**********************************************************************************************
4126 };
4128 //*************************************************************************************************
4129 
4130 
4131 //*************************************************************************************************
4133 template< typename VT, typename MT1, typename MT2 >
4134 struct TDVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
4135 {
4136  public:
4137  //**********************************************************************************************
4138  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4139  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4140  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4141  , typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4142  , INVALID_TYPE >::Type Type;
4143  //**********************************************************************************************
4144 };
4146 //*************************************************************************************************
4147 
4148 
4149 //*************************************************************************************************
4151 template< typename VT, typename MT1, typename MT2 >
4152 struct TSVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
4153 {
4154  public:
4155  //**********************************************************************************************
4156  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4157  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4158  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4159  , typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4160  , INVALID_TYPE >::Type Type;
4161  //**********************************************************************************************
4162 };
4164 //*************************************************************************************************
4165 
4166 
4167 //*************************************************************************************************
4169 template< typename MT1, typename MT2, bool AF >
4170 struct SubmatrixExprTrait< DMatDMatMultExpr<MT1,MT2>, AF >
4171 {
4172  public:
4173  //**********************************************************************************************
4174  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4175  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4176  //**********************************************************************************************
4177 };
4179 //*************************************************************************************************
4180 
4181 
4182 //*************************************************************************************************
4184 template< typename MT1, typename MT2 >
4185 struct RowExprTrait< DMatDMatMultExpr<MT1,MT2> >
4186 {
4187  public:
4188  //**********************************************************************************************
4189  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4190  //**********************************************************************************************
4191 };
4193 //*************************************************************************************************
4194 
4195 
4196 //*************************************************************************************************
4198 template< typename MT1, typename MT2 >
4199 struct ColumnExprTrait< DMatDMatMultExpr<MT1,MT2> >
4200 {
4201  public:
4202  //**********************************************************************************************
4203  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
4204  //**********************************************************************************************
4205 };
4207 //*************************************************************************************************
4208 
4209 } // namespace blaze
4210 
4211 #endif
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:127
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4579
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4075
Header file for the SparseVector base class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:124
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:197
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:259
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:330
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
DMatDMatMultExpr< MT1, MT2 > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:246
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:250
Header file for the sparse matrix SMP implementation.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2384
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:249
Header file for the DenseVector base class.
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:247
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:256
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:116
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2380
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:125
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:350
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDMatMultExpr.h:251
Header file for the dense matrix SMP implementation.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDMatMultExpr.h:394
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
const size_t DMATDMATMULT_THRESHOLD
Row-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the thr...
Definition: Thresholds.h:125
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:285
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDMatMultExpr.h:404
Header file for the IsDenseMatrix type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:248
Header file for the EnableIf class template.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:372
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:360
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:300
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:413
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:247
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:253
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:340
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:249
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:265
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:262
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:252
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:248
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:122
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2379
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
Header file for the complex data type.
const size_t SMP_DMATDMATMULT_THRESHOLD
SMP row-major dense matrix/row-major dense matrix multiplication threshold.This threshold represents ...
Definition: Thresholds.h:433
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:126
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:123
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:414
Header file for the IsResizable type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
void store(float *address, const sse_float_t &value)
Aligned store of a vector of &#39;float&#39; values.
Definition: Store.h:242
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:384
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.