All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
52 #include <blaze/math/Intrinsics.h>
53 #include <blaze/math/shims/Reset.h>
75 #include <blaze/system/BLAS.h>
77 #include <blaze/util/Assert.h>
78 #include <blaze/util/Complex.h>
84 #include <blaze/util/DisableIf.h>
85 #include <blaze/util/EnableIf.h>
86 #include <blaze/util/InvalidType.h>
88 #include <blaze/util/SelectType.h>
89 #include <blaze/util/Types.h>
95 
96 
97 namespace blaze {
98 
99 //=================================================================================================
100 //
101 // CLASS DMATTDMATMULTEXPR
102 //
103 //=================================================================================================
104 
105 //*************************************************************************************************
112 template< typename MT1 // Type of the left-hand side dense matrix
113  , typename MT2 > // Type of the right-hand side dense matrix
114 class DMatTDMatMultExpr : public DenseMatrix< DMatTDMatMultExpr<MT1,MT2>, false >
115  , private MatMatMultExpr
116  , private Computation
117 {
118  private:
119  //**Type definitions****************************************************************************
120  typedef typename MT1::ResultType RT1;
121  typedef typename MT2::ResultType RT2;
122  typedef typename MT1::CompositeType CT1;
123  typedef typename MT2::CompositeType CT2;
124  //**********************************************************************************************
125 
126  //**********************************************************************************************
128 
131  template< typename T1, typename T2, typename T3 >
132  struct UseSinglePrecisionKernel {
136  };
138  //**********************************************************************************************
139 
140  //**********************************************************************************************
142 
145  template< typename T1, typename T2, typename T3 >
146  struct UseDoublePrecisionKernel {
150  };
152  //**********************************************************************************************
153 
154  //**********************************************************************************************
156 
160  template< typename T1, typename T2, typename T3 >
161  struct UseSinglePrecisionComplexKernel {
162  typedef complex<float> Type;
163  enum { value = IsSame<typename T1::ElementType,Type>::value &&
164  IsSame<typename T2::ElementType,Type>::value &&
165  IsSame<typename T3::ElementType,Type>::value };
166  };
168  //**********************************************************************************************
169 
170  //**********************************************************************************************
172 
176  template< typename T1, typename T2, typename T3 >
177  struct UseDoublePrecisionComplexKernel {
178  typedef complex<double> Type;
179  enum { value = IsSame<typename T1::ElementType,Type>::value &&
180  IsSame<typename T2::ElementType,Type>::value &&
181  IsSame<typename T3::ElementType,Type>::value };
182  };
184  //**********************************************************************************************
185 
186  //**********************************************************************************************
188 
191  template< typename T1, typename T2, typename T3 >
192  struct UseDefaultKernel {
193  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
194  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
195  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
196  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
197  };
199  //**********************************************************************************************
200 
201  //**********************************************************************************************
203 
206  template< typename T1, typename T2, typename T3 >
207  struct UseVectorizedDefaultKernel {
208  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
209  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
210  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
211  IntrinsicTrait<typename T1::ElementType>::addition &&
212  IntrinsicTrait<typename T1::ElementType>::multiplication };
213  };
215  //**********************************************************************************************
216 
217  public:
218  //**Type definitions****************************************************************************
225  typedef const ElementType ReturnType;
226  typedef const ResultType CompositeType;
227 
229  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
230 
232  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
233 
235  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
236 
238  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
239  //**********************************************************************************************
240 
241  //**Compilation flags***************************************************************************
243  enum { vectorizable = 0 };
244  //**********************************************************************************************
245 
246  //**Constructor*********************************************************************************
252  explicit inline DMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
253  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
254  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
255  {
256  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
257  }
258  //**********************************************************************************************
259 
260  //**Access operator*****************************************************************************
267  inline ReturnType operator()( size_t i, size_t j ) const {
268  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
269  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
270 
271  ElementType tmp;
272 
273  if( lhs_.columns() != 0UL ) {
274  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
275  tmp = lhs_(i,0UL) * rhs_(0UL,j);
276  for( size_t k=1UL; k<end; k+=2UL ) {
277  tmp += lhs_(i,k ) * rhs_(k ,j);
278  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
279  }
280  if( end < lhs_.columns() ) {
281  tmp += lhs_(i,end) * rhs_(end,j);
282  }
283  }
284  else {
285  reset( tmp );
286  }
287 
288  return tmp;
289  }
290  //**********************************************************************************************
291 
292  //**Rows function*******************************************************************************
297  inline size_t rows() const {
298  return lhs_.rows();
299  }
300  //**********************************************************************************************
301 
302  //**Columns function****************************************************************************
307  inline size_t columns() const {
308  return rhs_.columns();
309  }
310  //**********************************************************************************************
311 
312  //**Left operand access*************************************************************************
317  inline LeftOperand leftOperand() const {
318  return lhs_;
319  }
320  //**********************************************************************************************
321 
322  //**Right operand access************************************************************************
327  inline RightOperand rightOperand() const {
328  return rhs_;
329  }
330  //**********************************************************************************************
331 
332  //**********************************************************************************************
338  template< typename T >
339  inline bool canAlias( const T* alias ) const {
340  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
341  }
342  //**********************************************************************************************
343 
344  //**********************************************************************************************
350  template< typename T >
351  inline bool isAliased( const T* alias ) const {
352  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
353  }
354  //**********************************************************************************************
355 
356  private:
357  //**Member variables****************************************************************************
360  //**********************************************************************************************
361 
362  //**Assignment to dense matrices****************************************************************
371  template< typename MT // Type of the target dense matrix
372  , bool SO > // Storage order of the target dense matrix
373  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
374  {
376 
377  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
378  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
379 
380  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
381  return;
382  }
383  else if( rhs.lhs_.columns() == 0UL ) {
384  reset( ~lhs );
385  return;
386  }
387 
388  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
389  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
390 
391  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
392  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
393  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
394  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
395  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
396  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
397 
398  if( (~lhs).rows() * (~lhs).columns() < DMATTDMATMULT_THRESHOLD )
399  DMatTDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
400  else
401  DMatTDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
402  }
404  //**********************************************************************************************
405 
406  //**Default assignment to dense matrices********************************************************
420  template< typename MT3 // Type of the left-hand side target matrix
421  , typename MT4 // Type of the left-hand side matrix operand
422  , typename MT5 > // Type of the right-hand side matrix operand
423  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
424  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
425  {
426  const size_t M( A.rows() );
427  const size_t N( B.columns() );
428  const size_t K( A.columns() );
429 
430  for( size_t i=0UL; i<M; ++i ) {
431  for( size_t j=0UL; j<N; ++j ) {
432  C(i,j) = A(i,0UL) * B(0UL,j);
433  }
434  for( size_t k=1UL; k<K; ++k ) {
435  for( size_t j=0UL; j<N; ++j ) {
436  C(i,j) += A(i,k) * B(k,j);
437  }
438  }
439  }
440  }
442  //**********************************************************************************************
443 
444  //**Vectorized default assignment to row-major dense matrices***********************************
458  template< typename MT3 // Type of the left-hand side target matrix
459  , typename MT4 // Type of the left-hand side matrix operand
460  , typename MT5 > // Type of the right-hand side matrix operand
461  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
462  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
463  {
464  typedef IntrinsicTrait<ElementType> IT;
465 
466  const size_t M( A.rows() );
467  const size_t N( B.columns() );
468  const size_t K( A.columns() );
469 
470  size_t i( 0UL );
471 
472  for( ; (i+2UL) <= M; i+=2UL ) {
473  size_t j( 0UL );
474  for( ; (j+4UL) <= N; j+=4UL ) {
475  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
476  for( size_t k=0UL; k<K; k+=IT::size ) {
477  const IntrinsicType a1( A.load(i ,k) );
478  const IntrinsicType a2( A.load(i+1UL,k) );
479  const IntrinsicType b1( B.load(k,j ) );
480  const IntrinsicType b2( B.load(k,j+1UL) );
481  const IntrinsicType b3( B.load(k,j+2UL) );
482  const IntrinsicType b4( B.load(k,j+3UL) );
483  xmm1 = xmm1 + a1 * b1;
484  xmm2 = xmm2 + a1 * b2;
485  xmm3 = xmm3 + a1 * b3;
486  xmm4 = xmm4 + a1 * b4;
487  xmm5 = xmm5 + a2 * b1;
488  xmm6 = xmm6 + a2 * b2;
489  xmm7 = xmm7 + a2 * b3;
490  xmm8 = xmm8 + a2 * b4;
491  }
492  (~C)(i ,j ) = sum( xmm1 );
493  (~C)(i ,j+1UL) = sum( xmm2 );
494  (~C)(i ,j+2UL) = sum( xmm3 );
495  (~C)(i ,j+3UL) = sum( xmm4 );
496  (~C)(i+1UL,j ) = sum( xmm5 );
497  (~C)(i+1UL,j+1UL) = sum( xmm6 );
498  (~C)(i+1UL,j+2UL) = sum( xmm7 );
499  (~C)(i+1UL,j+3UL) = sum( xmm8 );
500  }
501  for( ; (j+2UL) <= N; j+=2UL ) {
502  IntrinsicType xmm1, xmm2, xmm3, xmm4;
503  for( size_t k=0UL; k<K; k+=IT::size ) {
504  const IntrinsicType a1( A.load(i ,k) );
505  const IntrinsicType a2( A.load(i+1UL,k) );
506  const IntrinsicType b1( B.load(k,j ) );
507  const IntrinsicType b2( B.load(k,j+1UL) );
508  xmm1 = xmm1 + a1 * b1;
509  xmm2 = xmm2 + a1 * b2;
510  xmm3 = xmm3 + a2 * b1;
511  xmm4 = xmm4 + a2 * b2;
512  }
513  (~C)(i ,j ) = sum( xmm1 );
514  (~C)(i ,j+1UL) = sum( xmm2 );
515  (~C)(i+1UL,j ) = sum( xmm3 );
516  (~C)(i+1UL,j+1UL) = sum( xmm4 );
517  }
518  if( j < N ) {
519  IntrinsicType xmm1, xmm2;
520  for( size_t k=0UL; k<K; k+=IT::size ) {
521  const IntrinsicType b1( B.load(k,j) );
522  xmm1 = xmm1 + A.load(i ,k) * b1;
523  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
524  }
525  (~C)(i ,j) = sum( xmm1 );
526  (~C)(i+1UL,j) = sum( xmm2 );
527  }
528  }
529  if( i < M ) {
530  size_t j( 0UL );
531  for( ; (j+4UL) <= N; j+=4UL ) {
532  IntrinsicType xmm1, xmm2, xmm3, xmm4;
533  for( size_t k=0UL; k<K; k+=IT::size ) {
534  const IntrinsicType a1( A.load(i,k) );
535  xmm1 = xmm1 + a1 * B.load(k,j );
536  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
537  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
538  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
539  }
540  (~C)(i,j ) = sum( xmm1 );
541  (~C)(i,j+1UL) = sum( xmm2 );
542  (~C)(i,j+2UL) = sum( xmm3 );
543  (~C)(i,j+3UL) = sum( xmm4 );
544  }
545  for( ; (j+2UL) <= N; j+=2UL ) {
546  IntrinsicType xmm1, xmm2;
547  for( size_t k=0UL; k<K; k+=IT::size ) {
548  const IntrinsicType a1( A.load(i,k) );
549  xmm1 = xmm1 + a1 * B.load(k,j );
550  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
551  }
552  (~C)(i,j ) = sum( xmm1 );
553  (~C)(i,j+1UL) = sum( xmm2 );
554  }
555  if( j < N ) {
556  IntrinsicType xmm1, xmm2;
557  for( size_t k=0UL; k<K; k+=IT::size ) {
558  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
559  }
560  (~C)(i,j) = sum( xmm1 );
561  }
562  }
563  }
565  //**********************************************************************************************
566 
567  //**Vectorized default assignment to column-major dense matrices********************************
581  template< typename MT3 // Type of the left-hand side target matrix
582  , typename MT4 // Type of the left-hand side matrix operand
583  , typename MT5 > // Type of the right-hand side matrix operand
584  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
585  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
586  {
587  typedef IntrinsicTrait<ElementType> IT;
588 
589  const size_t M( A.rows() );
590  const size_t N( B.columns() );
591  const size_t K( A.columns() );
592 
593  size_t i( 0UL );
594 
595  for( ; (i+4UL) <= M; i+=4UL ) {
596  size_t j( 0UL );
597  for( ; (j+2UL) <= N; j+=2UL ) {
598  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
599  for( size_t k=0UL; k<K; k+=IT::size ) {
600  const IntrinsicType a1( A.load(i ,k) );
601  const IntrinsicType a2( A.load(i+1UL,k) );
602  const IntrinsicType a3( A.load(i+2UL,k) );
603  const IntrinsicType a4( A.load(i+3UL,k) );
604  const IntrinsicType b1( B.load(k,j ) );
605  const IntrinsicType b2( B.load(k,j+1UL) );
606  xmm1 = xmm1 + a1 * b1;
607  xmm2 = xmm2 + a1 * b2;
608  xmm3 = xmm3 + a2 * b1;
609  xmm4 = xmm4 + a2 * b2;
610  xmm5 = xmm5 + a3 * b1;
611  xmm6 = xmm6 + a3 * b2;
612  xmm7 = xmm7 + a4 * b1;
613  xmm8 = xmm8 + a4 * b2;
614  }
615  (~C)(i ,j ) = sum( xmm1 );
616  (~C)(i ,j+1UL) = sum( xmm2 );
617  (~C)(i+1UL,j ) = sum( xmm3 );
618  (~C)(i+1UL,j+1UL) = sum( xmm4 );
619  (~C)(i+2UL,j ) = sum( xmm5 );
620  (~C)(i+2UL,j+1UL) = sum( xmm6 );
621  (~C)(i+3UL,j ) = sum( xmm7 );
622  (~C)(i+3UL,j+1UL) = sum( xmm8 );
623  }
624  if( j < N ) {
625  IntrinsicType xmm1, xmm2, xmm3, xmm4;
626  for( size_t k=0UL; k<K; k+=IT::size ) {
627  const IntrinsicType b1( B.load(k,j) );
628  xmm1 = xmm1 + A.load(i ,k) * b1;
629  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
630  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
631  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
632  }
633  (~C)(i ,j) = sum( xmm1 );
634  (~C)(i+1UL,j) = sum( xmm2 );
635  (~C)(i+2UL,j) = sum( xmm3 );
636  (~C)(i+3UL,j) = sum( xmm4 );
637  }
638  }
639  for( ; (i+2UL) <= M; i+=2UL ) {
640  size_t j( 0UL );
641  for( ; (j+2UL) <= N; j+=2UL ) {
642  IntrinsicType xmm1, xmm2, xmm3, xmm4;
643  for( size_t k=0UL; k<K; k+=IT::size ) {
644  const IntrinsicType a1( A.load(i ,k) );
645  const IntrinsicType a2( A.load(i+1UL,k) );
646  const IntrinsicType b1( B.load(k,j ) );
647  const IntrinsicType b2( B.load(k,j+1UL) );
648  xmm1 = xmm1 + a1 * b1;
649  xmm2 = xmm2 + a1 * b2;
650  xmm3 = xmm3 + a2 * b1;
651  xmm4 = xmm4 + a2 * b2;
652  }
653  (~C)(i ,j ) = sum( xmm1 );
654  (~C)(i ,j+1UL) = sum( xmm2 );
655  (~C)(i+1UL,j ) = sum( xmm3 );
656  (~C)(i+1UL,j+1UL) = sum( xmm4 );
657  }
658  if( j < N ) {
659  IntrinsicType xmm1, xmm2;
660  for( size_t k=0UL; k<K; k+=IT::size ) {
661  const IntrinsicType b1( B.load(k,j) );
662  xmm1 = xmm1 + A.load(i ,k) * b1;
663  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
664  }
665  (~C)(i ,j) = sum( xmm1 );
666  (~C)(i+1UL,j) = sum( xmm2 );
667  }
668  }
669  if( i < M ) {
670  size_t j( 0UL );
671  for( ; (j+2UL) <= N; j+=2UL ) {
672  IntrinsicType xmm1, xmm2;
673  for( size_t k=0UL; k<K; k+=IT::size ) {
674  const IntrinsicType a1( A.load(i,k) );
675  xmm1 = xmm1 + a1 * B.load(k,j );
676  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
677  }
678  (~C)(i,j ) = sum( xmm1 );
679  (~C)(i,j+1UL) = sum( xmm2 );
680  }
681  if( j < N ) {
682  IntrinsicType xmm1, xmm2;
683  for( size_t k=0UL; k<K; k+=IT::size ) {
684  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
685  }
686  (~C)(i,j) = sum( xmm1 );
687  }
688  }
689  }
691  //**********************************************************************************************
692 
693  //**Default assignment to dense matrices********************************************************
707  template< typename MT3 // Type of the left-hand side target matrix
708  , typename MT4 // Type of the left-hand side matrix operand
709  , typename MT5 > // Type of the right-hand side matrix operand
710  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
711  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
712  {
713  selectDefaultAssignKernel( C, A, B );
714  }
716  //**********************************************************************************************
717 
718  //**BLAS-based assignment to dense matrices (single precision)**********************************
719 #if BLAZE_BLAS_MODE
720 
733  template< typename MT3 // Type of the left-hand side target matrix
734  , typename MT4 // Type of the left-hand side matrix operand
735  , typename MT5 > // Type of the right-hand side matrix operand
736  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
737  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
738  {
739  using boost::numeric_cast;
740 
744 
745  const int M ( numeric_cast<int>( A.rows() ) );
746  const int N ( numeric_cast<int>( B.columns() ) );
747  const int K ( numeric_cast<int>( A.columns() ) );
748  const int lda( numeric_cast<int>( A.spacing() ) );
749  const int ldb( numeric_cast<int>( B.spacing() ) );
750  const int ldc( numeric_cast<int>( C.spacing() ) );
751 
752  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
753  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
754  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
755  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
756  }
758 #endif
759  //**********************************************************************************************
760 
761  //**BLAS-based assignment to dense matrices (double precision)**********************************
762 #if BLAZE_BLAS_MODE
763 
776  template< typename MT3 // Type of the left-hand side target matrix
777  , typename MT4 // Type of the left-hand side matrix operand
778  , typename MT5 > // Type of the right-hand side matrix operand
779  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
780  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
781  {
782  using boost::numeric_cast;
783 
787 
788  const int M ( numeric_cast<int>( A.rows() ) );
789  const int N ( numeric_cast<int>( B.columns() ) );
790  const int K ( numeric_cast<int>( A.columns() ) );
791  const int lda( numeric_cast<int>( A.spacing() ) );
792  const int ldb( numeric_cast<int>( B.spacing() ) );
793  const int ldc( numeric_cast<int>( C.spacing() ) );
794 
795  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
796  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
797  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
798  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
799  }
801 #endif
802  //**********************************************************************************************
803 
804  //**BLAS-based assignment to dense matrices (single precision complex)**************************
805 #if BLAZE_BLAS_MODE
806 
819  template< typename MT3 // Type of the left-hand side target matrix
820  , typename MT4 // Type of the left-hand side matrix operand
821  , typename MT5 > // Type of the right-hand side matrix operand
822  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
823  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
824  {
825  using boost::numeric_cast;
826 
830  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
831  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
832  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
833 
834  const int M ( numeric_cast<int>( A.rows() ) );
835  const int N ( numeric_cast<int>( B.columns() ) );
836  const int K ( numeric_cast<int>( A.columns() ) );
837  const int lda( numeric_cast<int>( A.spacing() ) );
838  const int ldb( numeric_cast<int>( B.spacing() ) );
839  const int ldc( numeric_cast<int>( C.spacing() ) );
840  const complex<float> alpha( 1.0F, 0.0F );
841  const complex<float> beta ( 0.0F, 0.0F );
842 
843  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
844  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
845  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
846  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
847  }
849 #endif
850  //**********************************************************************************************
851 
852  //**BLAS-based assignment to dense matrices (double precision complex)**************************
853 #if BLAZE_BLAS_MODE
854 
867  template< typename MT3 // Type of the left-hand side target matrix
868  , typename MT4 // Type of the left-hand side matrix operand
869  , typename MT5 > // Type of the right-hand side matrix operand
870  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
871  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
872  {
873  using boost::numeric_cast;
874 
878  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
879  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
880  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
881 
882  const int M ( numeric_cast<int>( A.rows() ) );
883  const int N ( numeric_cast<int>( B.columns() ) );
884  const int K ( numeric_cast<int>( A.columns() ) );
885  const int lda( numeric_cast<int>( A.spacing() ) );
886  const int ldb( numeric_cast<int>( B.spacing() ) );
887  const int ldc( numeric_cast<int>( C.spacing() ) );
888  const complex<double> alpha( 1.0, 0.0 );
889  const complex<double> beta ( 0.0, 0.0 );
890 
891  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
892  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
893  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
894  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
895  }
897 #endif
898  //**********************************************************************************************
899 
900  //**Assignment to sparse matrices***************************************************************
912  template< typename MT // Type of the target sparse matrix
913  , bool SO > // Storage order of the target sparse matrix
914  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
915  {
917 
918  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
919 
926 
927  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
928  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
929 
930  const TmpType tmp( rhs );
931  assign( ~lhs, tmp );
932  }
934  //**********************************************************************************************
935 
936  //**Addition assignment to dense matrices*******************************************************
949  template< typename MT // Type of the target dense matrix
950  , bool SO > // Storage order of the target dense matrix
951  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
952  {
954 
955  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
956  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
957 
958  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
959  return;
960  }
961 
962  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
963  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
964 
965  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
966  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
967  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
968  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
969  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
970  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
971 
972  if( (~lhs).rows() * (~lhs).columns() < DMATTDMATMULT_THRESHOLD )
973  DMatTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
974  else
975  DMatTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
976  }
978  //**********************************************************************************************
979 
980  //**Default addition assignment to dense matrices***********************************************
994  template< typename MT3 // Type of the left-hand side target matrix
995  , typename MT4 // Type of the left-hand side matrix operand
996  , typename MT5 > // Type of the right-hand side matrix operand
997  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
998  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
999  {
1000  const size_t M( A.rows() );
1001  const size_t N( B.columns() );
1002  const size_t K( A.columns() );
1003 
1004  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1005  const size_t end( N & size_t(-2) );
1006 
1007  for( size_t i=0UL; i<M; ++i ) {
1008  for( size_t k=0UL; k<K; ++k ) {
1009  for( size_t j=0UL; j<end; j+=2UL ) {
1010  C(i,j ) += A(i,k) * B(k,j );
1011  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1012  }
1013  if( end < N ) {
1014  C(i,end) += A(i,k) * B(k,end);
1015  }
1016  }
1017  }
1018  }
1020  //**********************************************************************************************
1021 
1022  //**Vectorized default addition assignment to row-major dense matrices**************************
1036  template< typename MT3 // Type of the left-hand side target matrix
1037  , typename MT4 // Type of the left-hand side matrix operand
1038  , typename MT5 > // Type of the right-hand side matrix operand
1039  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1040  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1041  {
1042  typedef IntrinsicTrait<ElementType> IT;
1043 
1044  const size_t M( A.rows() );
1045  const size_t N( B.columns() );
1046  const size_t K( A.columns() );
1047 
1048  size_t i( 0UL );
1049 
1050  for( ; (i+2UL) <= M; i+=2UL ) {
1051  size_t j( 0UL );
1052  for( ; (j+4UL) <= N; j+=4UL ) {
1053  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1054  for( size_t k=0UL; k<K; k+=IT::size ) {
1055  const IntrinsicType a1( A.load(i ,k) );
1056  const IntrinsicType a2( A.load(i+1UL,k) );
1057  const IntrinsicType b1( B.load(k,j ) );
1058  const IntrinsicType b2( B.load(k,j+1UL) );
1059  const IntrinsicType b3( B.load(k,j+2UL) );
1060  const IntrinsicType b4( B.load(k,j+3UL) );
1061  xmm1 = xmm1 + a1 * b1;
1062  xmm2 = xmm2 + a1 * b2;
1063  xmm3 = xmm3 + a1 * b3;
1064  xmm4 = xmm4 + a1 * b4;
1065  xmm5 = xmm5 + a2 * b1;
1066  xmm6 = xmm6 + a2 * b2;
1067  xmm7 = xmm7 + a2 * b3;
1068  xmm8 = xmm8 + a2 * b4;
1069  }
1070  (~C)(i ,j ) += sum( xmm1 );
1071  (~C)(i ,j+1UL) += sum( xmm2 );
1072  (~C)(i ,j+2UL) += sum( xmm3 );
1073  (~C)(i ,j+3UL) += sum( xmm4 );
1074  (~C)(i+1UL,j ) += sum( xmm5 );
1075  (~C)(i+1UL,j+1UL) += sum( xmm6 );
1076  (~C)(i+1UL,j+2UL) += sum( xmm7 );
1077  (~C)(i+1UL,j+3UL) += sum( xmm8 );
1078  }
1079  for( ; (j+2UL) <= N; j+=2UL ) {
1080  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1081  for( size_t k=0UL; k<K; k+=IT::size ) {
1082  const IntrinsicType a1( A.load(i ,k) );
1083  const IntrinsicType a2( A.load(i+1UL,k) );
1084  const IntrinsicType b1( B.load(k,j ) );
1085  const IntrinsicType b2( B.load(k,j+1UL) );
1086  xmm1 = xmm1 + a1 * b1;
1087  xmm2 = xmm2 + a1 * b2;
1088  xmm3 = xmm3 + a2 * b1;
1089  xmm4 = xmm4 + a2 * b2;
1090  }
1091  (~C)(i ,j ) += sum( xmm1 );
1092  (~C)(i ,j+1UL) += sum( xmm2 );
1093  (~C)(i+1UL,j ) += sum( xmm3 );
1094  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1095  }
1096  if( j < N ) {
1097  IntrinsicType xmm1, xmm2;
1098  for( size_t k=0UL; k<K; k+=IT::size ) {
1099  const IntrinsicType b1( B.load(k,j) );
1100  xmm1 = xmm1 + A.load(i ,k) * b1;
1101  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1102  }
1103  (~C)(i ,j) += sum( xmm1 );
1104  (~C)(i+1UL,j) += sum( xmm2 );
1105  }
1106  }
1107  if( i < M ) {
1108  size_t j( 0UL );
1109  for( ; (j+4UL) <= N; j+=4UL ) {
1110  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1111  for( size_t k=0UL; k<K; k+=IT::size ) {
1112  const IntrinsicType a1( A.load(i,k) );
1113  xmm1 = xmm1 + a1 * B.load(k,j );
1114  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1115  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1116  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1117  }
1118  (~C)(i,j ) += sum( xmm1 );
1119  (~C)(i,j+1UL) += sum( xmm2 );
1120  (~C)(i,j+2UL) += sum( xmm3 );
1121  (~C)(i,j+3UL) += sum( xmm4 );
1122  }
1123  for( ; (j+2UL) <= N; j+=2UL ) {
1124  IntrinsicType xmm1, xmm2;
1125  for( size_t k=0UL; k<K; k+=IT::size ) {
1126  const IntrinsicType a1( A.load(i,k) );
1127  xmm1 = xmm1 + a1 * B.load(k,j );
1128  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1129  }
1130  (~C)(i,j ) += sum( xmm1 );
1131  (~C)(i,j+1UL) += sum( xmm2 );
1132  }
1133  if( j < N ) {
1134  IntrinsicType xmm1, xmm2;
1135  for( size_t k=0UL; k<K; k+=IT::size ) {
1136  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1137  }
1138  (~C)(i,j) += sum( xmm1 );
1139  }
1140  }
1141  }
1143  //**********************************************************************************************
1144 
1145  //**Vectorized default addition assignment to column-major dense matrices***********************
1159  template< typename MT3 // Type of the left-hand side target matrix
1160  , typename MT4 // Type of the left-hand side matrix operand
1161  , typename MT5 > // Type of the right-hand side matrix operand
1162  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1163  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1164  {
1165  typedef IntrinsicTrait<ElementType> IT;
1166 
1167  const size_t M( A.rows() );
1168  const size_t N( B.columns() );
1169  const size_t K( A.columns() );
1170 
1171  size_t i( 0UL );
1172 
1173  for( ; (i+4UL) <= M; i+=4UL ) {
1174  size_t j( 0UL );
1175  for( ; (j+2UL) <= N; j+=2UL ) {
1176  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1177  for( size_t k=0UL; k<K; k+=IT::size ) {
1178  const IntrinsicType a1( A.load(i ,k) );
1179  const IntrinsicType a2( A.load(i+1UL,k) );
1180  const IntrinsicType a3( A.load(i+2UL,k) );
1181  const IntrinsicType a4( A.load(i+3UL,k) );
1182  const IntrinsicType b1( B.load(k,j ) );
1183  const IntrinsicType b2( B.load(k,j+1UL) );
1184  xmm1 = xmm1 + a1 * b1;
1185  xmm2 = xmm2 + a1 * b2;
1186  xmm3 = xmm3 + a2 * b1;
1187  xmm4 = xmm4 + a2 * b2;
1188  xmm5 = xmm5 + a3 * b1;
1189  xmm6 = xmm6 + a3 * b2;
1190  xmm7 = xmm7 + a4 * b1;
1191  xmm8 = xmm8 + a4 * b2;
1192  }
1193  (~C)(i ,j ) += sum( xmm1 );
1194  (~C)(i ,j+1UL) += sum( xmm2 );
1195  (~C)(i+1UL,j ) += sum( xmm3 );
1196  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1197  (~C)(i+2UL,j ) += sum( xmm5 );
1198  (~C)(i+2UL,j+1UL) += sum( xmm6 );
1199  (~C)(i+3UL,j ) += sum( xmm7 );
1200  (~C)(i+3UL,j+1UL) += sum( xmm8 );
1201  }
1202  if( j < N ) {
1203  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1204  for( size_t k=0UL; k<K; k+=IT::size ) {
1205  const IntrinsicType b1( B.load(k,j) );
1206  xmm1 = xmm1 + A.load(i ,k) * b1;
1207  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1208  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1209  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1210  }
1211  (~C)(i ,j) += sum( xmm1 );
1212  (~C)(i+1UL,j) += sum( xmm2 );
1213  (~C)(i+2UL,j) += sum( xmm3 );
1214  (~C)(i+3UL,j) += sum( xmm4 );
1215  }
1216  }
1217  for( ; (i+2UL) <= M; i+=2UL ) {
1218  size_t j( 0UL );
1219  for( ; (j+2UL) <= N; j+=2UL ) {
1220  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1221  for( size_t k=0UL; k<K; k+=IT::size ) {
1222  const IntrinsicType a1( A.load(i ,k) );
1223  const IntrinsicType a2( A.load(i+1UL,k) );
1224  const IntrinsicType b1( B.load(k,j ) );
1225  const IntrinsicType b2( B.load(k,j+1UL) );
1226  xmm1 = xmm1 + a1 * b1;
1227  xmm2 = xmm2 + a1 * b2;
1228  xmm3 = xmm3 + a2 * b1;
1229  xmm4 = xmm4 + a2 * b2;
1230  }
1231  (~C)(i ,j ) += sum( xmm1 );
1232  (~C)(i ,j+1UL) += sum( xmm2 );
1233  (~C)(i+1UL,j ) += sum( xmm3 );
1234  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1235  }
1236  if( j < N ) {
1237  IntrinsicType xmm1, xmm2;
1238  for( size_t k=0UL; k<K; k+=IT::size ) {
1239  const IntrinsicType b1( B.load(k,j) );
1240  xmm1 = xmm1 + A.load(i ,k) * b1;
1241  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1242  }
1243  (~C)(i ,j) += sum( xmm1 );
1244  (~C)(i+1UL,j) += sum( xmm2 );
1245  }
1246  }
1247  if( i < M ) {
1248  size_t j( 0UL );
1249  for( ; (j+2UL) <= N; j+=2UL ) {
1250  IntrinsicType xmm1, xmm2;
1251  for( size_t k=0UL; k<K; k+=IT::size ) {
1252  const IntrinsicType a1( A.load(i,k) );
1253  xmm1 = xmm1 + a1 * B.load(k,j );
1254  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1255  }
1256  (~C)(i,j ) += sum( xmm1 );
1257  (~C)(i,j+1UL) += sum( xmm2 );
1258  }
1259  if( j < N ) {
1260  IntrinsicType xmm1, xmm2;
1261  for( size_t k=0UL; k<K; k+=IT::size ) {
1262  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1263  }
1264  (~C)(i,j) += sum( xmm1 );
1265  }
1266  }
1267  }
1269  //**********************************************************************************************
1270 
1271  //**Default addition assignment to dense matrices***********************************************
1285  template< typename MT3 // Type of the left-hand side target matrix
1286  , typename MT4 // Type of the left-hand side matrix operand
1287  , typename MT5 > // Type of the right-hand side matrix operand
1288  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1289  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1290  {
1291  selectDefaultAddAssignKernel( C, A, B );
1292  }
1294  //**********************************************************************************************
1295 
1296  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1297 #if BLAZE_BLAS_MODE
1298 
1311  template< typename MT3 // Type of the left-hand side target matrix
1312  , typename MT4 // Type of the left-hand side matrix operand
1313  , typename MT5 > // Type of the right-hand side matrix operand
1314  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1315  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1316  {
1317  using boost::numeric_cast;
1318 
1322 
1323  const int M ( numeric_cast<int>( A.rows() ) );
1324  const int N ( numeric_cast<int>( B.columns() ) );
1325  const int K ( numeric_cast<int>( A.columns() ) );
1326  const int lda( numeric_cast<int>( A.spacing() ) );
1327  const int ldb( numeric_cast<int>( B.spacing() ) );
1328  const int ldc( numeric_cast<int>( C.spacing() ) );
1329 
1330  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1331  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1332  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1333  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1334  }
1336 #endif
1337  //**********************************************************************************************
1338 
1339  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1340 #if BLAZE_BLAS_MODE
1341 
1354  template< typename MT3 // Type of the left-hand side target matrix
1355  , typename MT4 // Type of the left-hand side matrix operand
1356  , typename MT5 > // Type of the right-hand side matrix operand
1357  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1358  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1359  {
1360  using boost::numeric_cast;
1361 
1365 
1366  const int M ( numeric_cast<int>( A.rows() ) );
1367  const int N ( numeric_cast<int>( B.columns() ) );
1368  const int K ( numeric_cast<int>( A.columns() ) );
1369  const int lda( numeric_cast<int>( A.spacing() ) );
1370  const int ldb( numeric_cast<int>( B.spacing() ) );
1371  const int ldc( numeric_cast<int>( C.spacing() ) );
1372 
1373  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1374  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1375  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1376  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1377  }
1379 #endif
1380  //**********************************************************************************************
1381 
1382  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1383 #if BLAZE_BLAS_MODE
1384 
1397  template< typename MT3 // Type of the left-hand side target matrix
1398  , typename MT4 // Type of the left-hand side matrix operand
1399  , typename MT5 > // Type of the right-hand side matrix operand
1400  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1401  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1402  {
1403  using boost::numeric_cast;
1404 
1408  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1409  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1410  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1411 
1412  const int M ( numeric_cast<int>( A.rows() ) );
1413  const int N ( numeric_cast<int>( B.columns() ) );
1414  const int K ( numeric_cast<int>( A.columns() ) );
1415  const int lda( numeric_cast<int>( A.spacing() ) );
1416  const int ldb( numeric_cast<int>( B.spacing() ) );
1417  const int ldc( numeric_cast<int>( C.spacing() ) );
1418  const complex<float> alpha( 1.0F, 0.0F );
1419  const complex<float> beta ( 1.0F, 0.0F );
1420 
1421  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1422  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1423  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1424  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1425  }
1427 #endif
1428  //**********************************************************************************************
1429 
1430  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1431 #if BLAZE_BLAS_MODE
1432 
1445  template< typename MT3 // Type of the left-hand side target matrix
1446  , typename MT4 // Type of the left-hand side matrix operand
1447  , typename MT5 > // Type of the right-hand side matrix operand
1448  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1449  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1450  {
1451  using boost::numeric_cast;
1452 
1456  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1457  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1458  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1459 
1460  const int M ( numeric_cast<int>( A.rows() ) );
1461  const int N ( numeric_cast<int>( B.columns() ) );
1462  const int K ( numeric_cast<int>( A.columns() ) );
1463  const int lda( numeric_cast<int>( A.spacing() ) );
1464  const int ldb( numeric_cast<int>( B.spacing() ) );
1465  const int ldc( numeric_cast<int>( C.spacing() ) );
1466  const complex<double> alpha( 1.0, 0.0 );
1467  const complex<double> beta ( 1.0, 0.0 );
1468 
1469  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1470  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1471  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1472  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1473  }
1475 #endif
1476  //**********************************************************************************************
1477 
1478  //**Addition assignment to sparse matrices******************************************************
1479  // No special implementation for the addition assignment to sparse matrices.
1480  //**********************************************************************************************
1481 
1482  //**Subtraction assignment to dense matrices****************************************************
1495  template< typename MT // Type of the target dense matrix
1496  , bool SO > // Storage order of the target dense matrix
1497  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1498  {
1500 
1501  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1502  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1503 
1504  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1505  return;
1506  }
1507 
1508  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1509  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1510 
1511  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1512  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1513  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1514  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1515  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1516  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1517 
1518  if( (~lhs).rows() * (~lhs).columns() < DMATTDMATMULT_THRESHOLD )
1519  DMatTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1520  else
1521  DMatTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1522  }
1524  //**********************************************************************************************
1525 
1526  //**Default subtraction assignment to dense matrices********************************************
1540  template< typename MT3 // Type of the left-hand side target matrix
1541  , typename MT4 // Type of the left-hand side matrix operand
1542  , typename MT5 > // Type of the right-hand side matrix operand
1543  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1544  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1545  {
1546  const size_t M( A.rows() );
1547  const size_t N( B.columns() );
1548  const size_t K( A.columns() );
1549 
1550  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1551  const size_t end( N & size_t(-2) );
1552 
1553  for( size_t i=0UL; i<M; ++i ) {
1554  for( size_t k=0UL; k<K; ++k ) {
1555  for( size_t j=0UL; j<end; j+=2UL ) {
1556  C(i,j ) -= A(i,k) * B(k,j );
1557  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1558  }
1559  if( end < N ) {
1560  C(i,end) -= A(i,k) * B(k,end);
1561  }
1562  }
1563  }
1564  }
1566  //**********************************************************************************************
1567 
1568  //**Default subtraction assignment to row-major dense matrices**********************************
1582  template< typename MT3 // Type of the left-hand side target matrix
1583  , typename MT4 // Type of the left-hand side matrix operand
1584  , typename MT5 > // Type of the right-hand side matrix operand
1585  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1586  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1587  {
1588  typedef IntrinsicTrait<ElementType> IT;
1589 
1590  const size_t M( A.rows() );
1591  const size_t N( B.columns() );
1592  const size_t K( A.columns() );
1593 
1594  size_t i( 0UL );
1595 
1596  for( ; (i+2UL) <= M; i+=2UL ) {
1597  size_t j( 0UL );
1598  for( ; (j+4UL) <= N; j+=4UL ) {
1599  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1600  for( size_t k=0UL; k<K; k+=IT::size ) {
1601  const IntrinsicType a1( A.load(i ,k) );
1602  const IntrinsicType a2( A.load(i+1UL,k) );
1603  const IntrinsicType b1( B.load(k,j ) );
1604  const IntrinsicType b2( B.load(k,j+1UL) );
1605  const IntrinsicType b3( B.load(k,j+2UL) );
1606  const IntrinsicType b4( B.load(k,j+3UL) );
1607  xmm1 = xmm1 + a1 * b1;
1608  xmm2 = xmm2 + a1 * b2;
1609  xmm3 = xmm3 + a1 * b3;
1610  xmm4 = xmm4 + a1 * b4;
1611  xmm5 = xmm5 + a2 * b1;
1612  xmm6 = xmm6 + a2 * b2;
1613  xmm7 = xmm7 + a2 * b3;
1614  xmm8 = xmm8 + a2 * b4;
1615  }
1616  (~C)(i ,j ) -= sum( xmm1 );
1617  (~C)(i ,j+1UL) -= sum( xmm2 );
1618  (~C)(i ,j+2UL) -= sum( xmm3 );
1619  (~C)(i ,j+3UL) -= sum( xmm4 );
1620  (~C)(i+1UL,j ) -= sum( xmm5 );
1621  (~C)(i+1UL,j+1UL) -= sum( xmm6 );
1622  (~C)(i+1UL,j+2UL) -= sum( xmm7 );
1623  (~C)(i+1UL,j+3UL) -= sum( xmm8 );
1624  }
1625  for( ; (j+2UL) <= N; j+=2UL ) {
1626  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1627  for( size_t k=0UL; k<K; k+=IT::size ) {
1628  const IntrinsicType a1( A.load(i ,k) );
1629  const IntrinsicType a2( A.load(i+1UL,k) );
1630  const IntrinsicType b1( B.load(k,j ) );
1631  const IntrinsicType b2( B.load(k,j+1UL) );
1632  xmm1 = xmm1 + a1 * b1;
1633  xmm2 = xmm2 + a1 * b2;
1634  xmm3 = xmm3 + a2 * b1;
1635  xmm4 = xmm4 + a2 * b2;
1636  }
1637  (~C)(i ,j ) -= sum( xmm1 );
1638  (~C)(i ,j+1UL) -= sum( xmm2 );
1639  (~C)(i+1UL,j ) -= sum( xmm3 );
1640  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1641  }
1642  if( j < N ) {
1643  IntrinsicType xmm1, xmm2;
1644  for( size_t k=0UL; k<K; k+=IT::size ) {
1645  const IntrinsicType b1( B.load(k,j) );
1646  xmm1 = xmm1 + A.load(i ,k) * b1;
1647  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1648  }
1649  (~C)(i ,j) -= sum( xmm1 );
1650  (~C)(i+1UL,j) -= sum( xmm2 );
1651  }
1652  }
1653  if( i < M ) {
1654  size_t j( 0UL );
1655  for( ; (j+4UL) <= N; j+=4UL ) {
1656  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1657  for( size_t k=0UL; k<K; k+=IT::size ) {
1658  const IntrinsicType a1( A.load(i,k) );
1659  xmm1 = xmm1 + a1 * B.load(k,j );
1660  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1661  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1662  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1663  }
1664  (~C)(i,j ) -= sum( xmm1 );
1665  (~C)(i,j+1UL) -= sum( xmm2 );
1666  (~C)(i,j+2UL) -= sum( xmm3 );
1667  (~C)(i,j+3UL) -= sum( xmm4 );
1668  }
1669  for( ; (j+2UL) <= N; j+=2UL ) {
1670  IntrinsicType xmm1, xmm2;
1671  for( size_t k=0UL; k<K; k+=IT::size ) {
1672  const IntrinsicType a1( A.load(i,k) );
1673  xmm1 = xmm1 + a1 * B.load(k,j );
1674  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1675  }
1676  (~C)(i,j ) -= sum( xmm1 );
1677  (~C)(i,j+1UL) -= sum( xmm2 );
1678  }
1679  if( j < N ) {
1680  IntrinsicType xmm1, xmm2;
1681  for( size_t k=0UL; k<K; k+=IT::size ) {
1682  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1683  }
1684  (~C)(i,j) -= sum( xmm1 );
1685  }
1686  }
1687  }
1689  //**********************************************************************************************
1690 
1691  //**Default subtraction assignment to column-major dense matrices*******************************
1705  template< typename MT3 // Type of the left-hand side target matrix
1706  , typename MT4 // Type of the left-hand side matrix operand
1707  , typename MT5 > // Type of the right-hand side matrix operand
1708  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1709  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1710  {
1711  typedef IntrinsicTrait<ElementType> IT;
1712 
1713  const size_t M( A.rows() );
1714  const size_t N( B.columns() );
1715  const size_t K( A.columns() );
1716 
1717  size_t i( 0UL );
1718 
1719  for( ; (i+4UL) <= M; i+=4UL ) {
1720  size_t j( 0UL );
1721  for( ; (j+2UL) <= N; j+=2UL ) {
1722  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1723  for( size_t k=0UL; k<K; k+=IT::size ) {
1724  const IntrinsicType a1( A.load(i ,k) );
1725  const IntrinsicType a2( A.load(i+1UL,k) );
1726  const IntrinsicType a3( A.load(i+2UL,k) );
1727  const IntrinsicType a4( A.load(i+3UL,k) );
1728  const IntrinsicType b1( B.load(k,j ) );
1729  const IntrinsicType b2( B.load(k,j+1UL) );
1730  xmm1 = xmm1 + a1 * b1;
1731  xmm2 = xmm2 + a1 * b2;
1732  xmm3 = xmm3 + a2 * b1;
1733  xmm4 = xmm4 + a2 * b2;
1734  xmm5 = xmm5 + a3 * b1;
1735  xmm6 = xmm6 + a3 * b2;
1736  xmm7 = xmm7 + a4 * b1;
1737  xmm8 = xmm8 + a4 * b2;
1738  }
1739  (~C)(i ,j ) -= sum( xmm1 );
1740  (~C)(i ,j+1UL) -= sum( xmm2 );
1741  (~C)(i+1UL,j ) -= sum( xmm3 );
1742  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1743  (~C)(i+2UL,j ) -= sum( xmm5 );
1744  (~C)(i+2UL,j+1UL) -= sum( xmm6 );
1745  (~C)(i+3UL,j ) -= sum( xmm7 );
1746  (~C)(i+3UL,j+1UL) -= sum( xmm8 );
1747  }
1748  if( j < N ) {
1749  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1750  for( size_t k=0UL; k<K; k+=IT::size ) {
1751  const IntrinsicType b1( B.load(k,j) );
1752  xmm1 = xmm1 + A.load(i ,k) * b1;
1753  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1754  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1755  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1756  }
1757  (~C)(i ,j) -= sum( xmm1 );
1758  (~C)(i+1UL,j) -= sum( xmm2 );
1759  (~C)(i+2UL,j) -= sum( xmm3 );
1760  (~C)(i+3UL,j) -= sum( xmm4 );
1761  }
1762  }
1763  for( ; (i+2UL) <= M; i+=2UL ) {
1764  size_t j( 0UL );
1765  for( ; (j+2UL) <= N; j+=2UL ) {
1766  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1767  for( size_t k=0UL; k<K; k+=IT::size ) {
1768  const IntrinsicType a1( A.load(i ,k) );
1769  const IntrinsicType a2( A.load(i+1UL,k) );
1770  const IntrinsicType b1( B.load(k,j ) );
1771  const IntrinsicType b2( B.load(k,j+1UL) );
1772  xmm1 = xmm1 + a1 * b1;
1773  xmm2 = xmm2 + a1 * b2;
1774  xmm3 = xmm3 + a2 * b1;
1775  xmm4 = xmm4 + a2 * b2;
1776  }
1777  (~C)(i ,j ) -= sum( xmm1 );
1778  (~C)(i ,j+1UL) -= sum( xmm2 );
1779  (~C)(i+1UL,j ) -= sum( xmm3 );
1780  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1781  }
1782  if( j < N ) {
1783  IntrinsicType xmm1, xmm2;
1784  for( size_t k=0UL; k<K; k+=IT::size ) {
1785  const IntrinsicType b1( B.load(k,j) );
1786  xmm1 = xmm1 + A.load(i ,k) * b1;
1787  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1788  }
1789  (~C)(i ,j) -= sum( xmm1 );
1790  (~C)(i+1UL,j) -= sum( xmm2 );
1791  }
1792  }
1793  if( i < M ) {
1794  size_t j( 0UL );
1795  for( ; (j+2UL) <= N; j+=2UL ) {
1796  IntrinsicType xmm1, xmm2;
1797  for( size_t k=0UL; k<K; k+=IT::size ) {
1798  const IntrinsicType a1( A.load(i,k) );
1799  xmm1 = xmm1 + a1 * B.load(k,j );
1800  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1801  }
1802  (~C)(i,j ) -= sum( xmm1 );
1803  (~C)(i,j+1UL) -= sum( xmm2 );
1804  }
1805  if( j < N ) {
1806  IntrinsicType xmm1, xmm2;
1807  for( size_t k=0UL; k<K; k+=IT::size ) {
1808  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1809  }
1810  (~C)(i,j) -= sum( xmm1 );
1811  }
1812  }
1813  }
1815  //**********************************************************************************************
1816 
1817  //**Default subtraction assignment to dense matrices********************************************
1831  template< typename MT3 // Type of the left-hand side target matrix
1832  , typename MT4 // Type of the left-hand side matrix operand
1833  , typename MT5 > // Type of the right-hand side matrix operand
1834  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1835  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1836  {
1837  selectDefaultSubAssignKernel( C, A, B );
1838  }
1840  //**********************************************************************************************
1841 
1842  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1843 #if BLAZE_BLAS_MODE
1844 
1857  template< typename MT3 // Type of the left-hand side target matrix
1858  , typename MT4 // Type of the left-hand side matrix operand
1859  , typename MT5 > // Type of the right-hand side matrix operand
1860  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1861  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1862  {
1863  using boost::numeric_cast;
1864 
1868 
1869  const int M ( numeric_cast<int>( A.rows() ) );
1870  const int N ( numeric_cast<int>( B.columns() ) );
1871  const int K ( numeric_cast<int>( A.columns() ) );
1872  const int lda( numeric_cast<int>( A.spacing() ) );
1873  const int ldb( numeric_cast<int>( B.spacing() ) );
1874  const int ldc( numeric_cast<int>( C.spacing() ) );
1875 
1876  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1877  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1878  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1879  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1880  }
1882 #endif
1883  //**********************************************************************************************
1884 
1885  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
1886 #if BLAZE_BLAS_MODE
1887 
1900  template< typename MT3 // Type of the left-hand side target matrix
1901  , typename MT4 // Type of the left-hand side matrix operand
1902  , typename MT5 > // Type of the right-hand side matrix operand
1903  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1904  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1905  {
1906  using boost::numeric_cast;
1907 
1911 
1912  const int M ( numeric_cast<int>( A.rows() ) );
1913  const int N ( numeric_cast<int>( B.columns() ) );
1914  const int K ( numeric_cast<int>( A.columns() ) );
1915  const int lda( numeric_cast<int>( A.spacing() ) );
1916  const int ldb( numeric_cast<int>( B.spacing() ) );
1917  const int ldc( numeric_cast<int>( C.spacing() ) );
1918 
1919  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1920  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1921  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1922  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1923  }
1925 #endif
1926  //**********************************************************************************************
1927 
1928  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
1929 #if BLAZE_BLAS_MODE
1930 
1943  template< typename MT3 // Type of the left-hand side target matrix
1944  , typename MT4 // Type of the left-hand side matrix operand
1945  , typename MT5 > // Type of the right-hand side matrix operand
1946  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1947  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1948  {
1949  using boost::numeric_cast;
1950 
1954  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1955  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1956  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1957 
1958  const int M ( numeric_cast<int>( A.rows() ) );
1959  const int N ( numeric_cast<int>( B.columns() ) );
1960  const int K ( numeric_cast<int>( A.columns() ) );
1961  const int lda( numeric_cast<int>( A.spacing() ) );
1962  const int ldb( numeric_cast<int>( B.spacing() ) );
1963  const int ldc( numeric_cast<int>( C.spacing() ) );
1964  const complex<float> alpha( -1.0F, 0.0F );
1965  const complex<float> beta ( 1.0F, 0.0F );
1966 
1967  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1968  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1969  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1970  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1971  }
1973 #endif
1974  //**********************************************************************************************
1975 
1976  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
1977 #if BLAZE_BLAS_MODE
1978 
1991  template< typename MT3 // Type of the left-hand side target matrix
1992  , typename MT4 // Type of the left-hand side matrix operand
1993  , typename MT5 > // Type of the right-hand side matrix operand
1994  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1995  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1996  {
1997  using boost::numeric_cast;
1998 
2002  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2003  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2004  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2005 
2006  const int M ( numeric_cast<int>( A.rows() ) );
2007  const int N ( numeric_cast<int>( B.columns() ) );
2008  const int K ( numeric_cast<int>( A.columns() ) );
2009  const int lda( numeric_cast<int>( A.spacing() ) );
2010  const int ldb( numeric_cast<int>( B.spacing() ) );
2011  const int ldc( numeric_cast<int>( C.spacing() ) );
2012  const complex<double> alpha( -1.0, 0.0 );
2013  const complex<double> beta ( 1.0, 0.0 );
2014 
2015  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2016  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2017  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2018  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2019  }
2021 #endif
2022  //**********************************************************************************************
2023 
2024  //**Subtraction assignment to sparse matrices***************************************************
2025  // No special implementation for the subtraction assignment to sparse matrices.
2026  //**********************************************************************************************
2027 
2028  //**Multiplication assignment to dense matrices*************************************************
2029  // No special implementation for the multiplication assignment to dense matrices.
2030  //**********************************************************************************************
2031 
2032  //**Multiplication assignment to sparse matrices************************************************
2033  // No special implementation for the multiplication assignment to sparse matrices.
2034  //**********************************************************************************************
2035 
2036  //**Compile time checks*************************************************************************
2043  //**********************************************************************************************
2044 };
2045 //*************************************************************************************************
2046 
2047 
2048 
2049 
2050 //=================================================================================================
2051 //
2052 // DMATSCALARMULTEXPR SPECIALIZATION
2053 //
2054 //=================================================================================================
2055 
2056 //*************************************************************************************************
2064 template< typename MT1 // Type of the left-hand side dense matrix
2065  , typename MT2 // Type of the right-hand side dense matrix
2066  , typename ST > // Type of the right-hand side scalar value
2067 class DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >
2068  : public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
2069  , private MatScalarMultExpr
2070  , private Computation
2071 {
2072  private:
2073  //**Type definitions****************************************************************************
2074  typedef DMatTDMatMultExpr<MT1,MT2> MMM;
2075  typedef typename MMM::ResultType RES;
2076  typedef typename MT1::ResultType RT1;
2077  typedef typename MT2::ResultType RT2;
2078  typedef typename MT1::CompositeType CT1;
2079  typedef typename MT2::CompositeType CT2;
2080  //**********************************************************************************************
2081 
2082  //**********************************************************************************************
2084 
2087  template< typename T1, typename T2, typename T3, typename T4 >
2088  struct UseSinglePrecisionKernel {
2089  enum { value = IsFloat<typename T1::ElementType>::value &&
2090  IsFloat<typename T2::ElementType>::value &&
2091  IsFloat<typename T3::ElementType>::value &&
2092  !IsComplex<T4>::value };
2093  };
2094  //**********************************************************************************************
2095 
2096  //**********************************************************************************************
2098 
2101  template< typename T1, typename T2, typename T3, typename T4 >
2102  struct UseDoublePrecisionKernel {
2103  enum { value = IsDouble<typename T1::ElementType>::value &&
2104  IsDouble<typename T2::ElementType>::value &&
2105  IsDouble<typename T3::ElementType>::value &&
2106  !IsComplex<T4>::value };
2107  };
2108  //**********************************************************************************************
2109 
2110  //**********************************************************************************************
2112 
2115  template< typename T1, typename T2, typename T3 >
2116  struct UseSinglePrecisionComplexKernel {
2117  typedef complex<float> Type;
2118  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2119  IsSame<typename T2::ElementType,Type>::value &&
2120  IsSame<typename T3::ElementType,Type>::value };
2121  };
2122  //**********************************************************************************************
2123 
2124  //**********************************************************************************************
2126 
2129  template< typename T1, typename T2, typename T3 >
2130  struct UseDoublePrecisionComplexKernel {
2131  typedef complex<double> Type;
2132  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2133  IsSame<typename T2::ElementType,Type>::value &&
2134  IsSame<typename T3::ElementType,Type>::value };
2135  };
2136  //**********************************************************************************************
2137 
2138  //**********************************************************************************************
2140 
2142  template< typename T1, typename T2, typename T3, typename T4 >
2143  struct UseDefaultKernel {
2144  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2145  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2146  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2147  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2148  };
2149  //**********************************************************************************************
2150 
2151  //**********************************************************************************************
2153 
2155  template< typename T1, typename T2, typename T3, typename T4 >
2156  struct UseVectorizedDefaultKernel {
2157  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2158  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2159  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2160  IsSame<typename T1::ElementType,T4>::value &&
2161  IntrinsicTrait<typename T1::ElementType>::addition &&
2162  IntrinsicTrait<typename T1::ElementType>::multiplication };
2163  };
2164  //**********************************************************************************************
2165 
2166  public:
2167  //**Type definitions****************************************************************************
2168  typedef DMatScalarMultExpr<MMM,ST,false> This;
2169  typedef typename MultTrait<RES,ST>::Type ResultType;
2170  typedef typename ResultType::OppositeType OppositeType;
2171  typedef typename ResultType::TransposeType TransposeType;
2172  typedef typename ResultType::ElementType ElementType;
2173  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2174  typedef const ElementType ReturnType;
2175  typedef const ResultType CompositeType;
2176 
2178  typedef const DMatTDMatMultExpr<MT1,MT2> LeftOperand;
2179 
2181  typedef ST RightOperand;
2182 
2184  typedef typename SelectType< IsComputation<MT1>::value, const RT1, CT1 >::Type LT;
2185 
2187  typedef typename SelectType< IsComputation<MT2>::value, const RT2, CT2 >::Type RT;
2188  //**********************************************************************************************
2189 
2190  //**Compilation flags***************************************************************************
2192  enum { vectorizable = 0 };
2193  //**********************************************************************************************
2194 
2195  //**Constructor*********************************************************************************
2201  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2202  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2203  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2204  {}
2205  //**********************************************************************************************
2206 
2207  //**Access operator*****************************************************************************
2214  inline ReturnType operator()( size_t i, size_t j ) const {
2215  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2216  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2217  return matrix_(i,j) * scalar_;
2218  }
2219  //**********************************************************************************************
2220 
2221  //**Rows function*******************************************************************************
2226  inline size_t rows() const {
2227  return matrix_.rows();
2228  }
2229  //**********************************************************************************************
2230 
2231  //**Columns function****************************************************************************
2236  inline size_t columns() const {
2237  return matrix_.columns();
2238  }
2239  //**********************************************************************************************
2240 
2241  //**Left operand access*************************************************************************
2246  inline LeftOperand leftOperand() const {
2247  return matrix_;
2248  }
2249  //**********************************************************************************************
2250 
2251  //**Right operand access************************************************************************
2256  inline RightOperand rightOperand() const {
2257  return scalar_;
2258  }
2259  //**********************************************************************************************
2260 
2261  //**********************************************************************************************
2267  template< typename T >
2268  inline bool canAlias( const T* alias ) const {
2269  return matrix_.canAlias( alias );
2270  }
2271  //**********************************************************************************************
2272 
2273  //**********************************************************************************************
2279  template< typename T >
2280  inline bool isAliased( const T* alias ) const {
2281  return matrix_.isAliased( alias );
2282  }
2283  //**********************************************************************************************
2284 
2285  private:
2286  //**Member variables****************************************************************************
2288  RightOperand scalar_;
2289  //**********************************************************************************************
2290 
2291  //**Assignment to dense matrices****************************************************************
2300  template< typename MT3 // Type of the target dense matrix
2301  , bool SO > // Storage order of the target dense matrix
2302  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2303  {
2305 
2306  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2307  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2308 
2309  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2310  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2311 
2312  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2313  return;
2314  }
2315  else if( left.columns() == 0UL ) {
2316  reset( ~lhs );
2317  return;
2318  }
2319 
2320  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2321  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2322 
2323  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2324  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2325  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2326  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2327  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2328  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2329 
2330  if( (~lhs).rows() * (~lhs).columns() < DMATTDMATMULT_THRESHOLD )
2331  DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2332  else
2333  DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2334  }
2335  //**********************************************************************************************
2336 
2337  //**Default assignment to dense matrices********************************************************
2351  template< typename MT3 // Type of the left-hand side target matrix
2352  , typename MT4 // Type of the left-hand side matrix operand
2353  , typename MT5 // Type of the right-hand side matrix operand
2354  , typename ST2 > // Type of the scalar value
2355  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2356  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2357  {
2358  for( size_t i=0UL; i<A.rows(); ++i ) {
2359  for( size_t k=0UL; k<B.columns(); ++k ) {
2360  C(i,k) = A(i,0UL) * B(0UL,k);
2361  }
2362  for( size_t j=1UL; j<A.columns(); ++j ) {
2363  for( size_t k=0UL; k<B.columns(); ++k ) {
2364  C(i,k) += A(i,j) * B(j,k);
2365  }
2366  }
2367  for( size_t k=0UL; k<B.columns(); ++k ) {
2368  C(i,k) *= scalar;
2369  }
2370  }
2371  }
2372  //**********************************************************************************************
2373 
2374  //**Vectorized default assignment to row-major dense matrices***********************************
2388  template< typename MT3 // Type of the left-hand side target matrix
2389  , typename MT4 // Type of the left-hand side matrix operand
2390  , typename MT5 // Type of the right-hand side matrix operand
2391  , typename ST2 > // Type of the scalar value
2392  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2393  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2394  {
2395  typedef IntrinsicTrait<ElementType> IT;
2396 
2397  const size_t M( A.rows() );
2398  const size_t N( B.columns() );
2399  const size_t K( A.columns() );
2400 
2401  size_t i( 0UL );
2402 
2403  for( ; (i+2UL) <= M; i+=2UL ) {
2404  size_t j( 0UL );
2405  for( ; (j+4UL) <= N; j+=4UL ) {
2406  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2407  for( size_t k=0UL; k<K; k+=IT::size ) {
2408  const IntrinsicType a1( A.load(i ,k) );
2409  const IntrinsicType a2( A.load(i+1UL,k) );
2410  const IntrinsicType b1( B.load(k,j ) );
2411  const IntrinsicType b2( B.load(k,j+1UL) );
2412  const IntrinsicType b3( B.load(k,j+2UL) );
2413  const IntrinsicType b4( B.load(k,j+3UL) );
2414  xmm1 = xmm1 + a1 * b1;
2415  xmm2 = xmm2 + a1 * b2;
2416  xmm3 = xmm3 + a1 * b3;
2417  xmm4 = xmm4 + a1 * b4;
2418  xmm5 = xmm5 + a2 * b1;
2419  xmm6 = xmm6 + a2 * b2;
2420  xmm7 = xmm7 + a2 * b3;
2421  xmm8 = xmm8 + a2 * b4;
2422  }
2423  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2424  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2425  (~C)(i ,j+2UL) = sum( xmm3 ) * scalar;
2426  (~C)(i ,j+3UL) = sum( xmm4 ) * scalar;
2427  (~C)(i+1UL,j ) = sum( xmm5 ) * scalar;
2428  (~C)(i+1UL,j+1UL) = sum( xmm6 ) * scalar;
2429  (~C)(i+1UL,j+2UL) = sum( xmm7 ) * scalar;
2430  (~C)(i+1UL,j+3UL) = sum( xmm8 ) * scalar;
2431  }
2432  for( ; (j+2UL) <= N; j+=2UL ) {
2433  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2434  for( size_t k=0UL; k<K; k+=IT::size ) {
2435  const IntrinsicType a1( A.load(i ,k) );
2436  const IntrinsicType a2( A.load(i+1UL,k) );
2437  const IntrinsicType b1( B.load(k,j ) );
2438  const IntrinsicType b2( B.load(k,j+1UL) );
2439  xmm1 = xmm1 + a1 * b1;
2440  xmm2 = xmm2 + a1 * b2;
2441  xmm3 = xmm3 + a2 * b1;
2442  xmm4 = xmm4 + a2 * b2;
2443  }
2444  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2445  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2446  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
2447  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
2448  }
2449  if( j < N ) {
2450  IntrinsicType xmm1, xmm2;
2451  for( size_t k=0UL; k<K; k+=IT::size ) {
2452  const IntrinsicType b1( B.load(k,j) );
2453  xmm1 = xmm1 + A.load(i ,k) * b1;
2454  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2455  }
2456  (~C)(i ,j) = sum( xmm1 ) * scalar;
2457  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
2458  }
2459  }
2460  if( i < M ) {
2461  size_t j( 0UL );
2462  for( ; (j+4UL) <= N; j+=4UL ) {
2463  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2464  for( size_t k=0UL; k<K; k+=IT::size ) {
2465  const IntrinsicType a1( A.load(i,k) );
2466  xmm1 = xmm1 + a1 * B.load(k,j );
2467  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2468  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
2469  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
2470  }
2471  (~C)(i,j ) = sum( xmm1 ) * scalar;
2472  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
2473  (~C)(i,j+2UL) = sum( xmm3 ) * scalar;
2474  (~C)(i,j+3UL) = sum( xmm4 ) * scalar;
2475  }
2476  for( ; (j+2UL) <= N; j+=2UL ) {
2477  IntrinsicType xmm1, xmm2;
2478  for( size_t k=0UL; k<K; k+=IT::size ) {
2479  const IntrinsicType a1( A.load(i,k) );
2480  xmm1 = xmm1 + a1 * B.load(k,j );
2481  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2482  }
2483  (~C)(i,j ) = sum( xmm1 ) * scalar;
2484  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
2485  }
2486  if( j < N ) {
2487  IntrinsicType xmm1, xmm2;
2488  for( size_t k=0UL; k<K; k+=IT::size ) {
2489  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2490  }
2491  (~C)(i,j) = sum( xmm1 ) * scalar;
2492  }
2493  }
2494  }
2495  //**********************************************************************************************
2496 
2497  //**Vectorized default assignment to column-major dense matrices********************************
2511  template< typename MT3 // Type of the left-hand side target matrix
2512  , typename MT4 // Type of the left-hand side matrix operand
2513  , typename MT5 // Type of the right-hand side matrix operand
2514  , typename ST2 > // Type of the scalar value
2515  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2516  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2517  {
2518  typedef IntrinsicTrait<ElementType> IT;
2519 
2520  const size_t M( A.rows() );
2521  const size_t N( B.columns() );
2522  const size_t K( A.columns() );
2523 
2524  size_t i( 0UL );
2525 
2526  for( ; (i+4UL) <= M; i+=4UL ) {
2527  size_t j( 0UL );
2528  for( ; (j+2UL) <= N; j+=2UL ) {
2529  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2530  for( size_t k=0UL; k<K; k+=IT::size ) {
2531  const IntrinsicType a1( A.load(i ,k) );
2532  const IntrinsicType a2( A.load(i+1UL,k) );
2533  const IntrinsicType a3( A.load(i+2UL,k) );
2534  const IntrinsicType a4( A.load(i+3UL,k) );
2535  const IntrinsicType b1( B.load(k,j ) );
2536  const IntrinsicType b2( B.load(k,j+1UL) );
2537  xmm1 = xmm1 + a1 * b1;
2538  xmm2 = xmm2 + a1 * b2;
2539  xmm3 = xmm3 + a2 * b1;
2540  xmm4 = xmm4 + a2 * b2;
2541  xmm5 = xmm5 + a3 * b1;
2542  xmm6 = xmm6 + a3 * b2;
2543  xmm7 = xmm7 + a4 * b1;
2544  xmm8 = xmm8 + a4 * b2;
2545  }
2546  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2547  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2548  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
2549  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
2550  (~C)(i+2UL,j ) = sum( xmm5 ) * scalar;
2551  (~C)(i+2UL,j+1UL) = sum( xmm6 ) * scalar;
2552  (~C)(i+3UL,j ) = sum( xmm7 ) * scalar;
2553  (~C)(i+3UL,j+1UL) = sum( xmm8 ) * scalar;
2554  }
2555  if( j < N ) {
2556  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2557  for( size_t k=0UL; k<K; k+=IT::size ) {
2558  const IntrinsicType b1( B.load(k,j) );
2559  xmm1 = xmm1 + A.load(i ,k) * b1;
2560  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2561  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
2562  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
2563  }
2564  (~C)(i ,j) = sum( xmm1 ) * scalar;
2565  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
2566  (~C)(i+2UL,j) = sum( xmm3 ) * scalar;
2567  (~C)(i+3UL,j) = sum( xmm4 ) * scalar;
2568  }
2569  }
2570  for( ; (i+2UL) <= M; i+=2UL ) {
2571  size_t j( 0UL );
2572  for( ; (j+2UL) <= N; j+=2UL ) {
2573  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2574  for( size_t k=0UL; k<K; k+=IT::size ) {
2575  const IntrinsicType a1( A.load(i ,k) );
2576  const IntrinsicType a2( A.load(i+1UL,k) );
2577  const IntrinsicType b1( B.load(k,j ) );
2578  const IntrinsicType b2( B.load(k,j+1UL) );
2579  xmm1 = xmm1 + a1 * b1;
2580  xmm2 = xmm2 + a1 * b2;
2581  xmm3 = xmm3 + a2 * b1;
2582  xmm4 = xmm4 + a2 * b2;
2583  }
2584  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2585  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2586  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
2587  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
2588  }
2589  if( j < N ) {
2590  IntrinsicType xmm1, xmm2;
2591  for( size_t k=0UL; k<K; k+=IT::size ) {
2592  const IntrinsicType b1( B.load(k,j) );
2593  xmm1 = xmm1 + A.load(i ,k) * b1;
2594  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2595  }
2596  (~C)(i ,j) = sum( xmm1 ) * scalar;
2597  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
2598  }
2599  }
2600  if( i < M ) {
2601  size_t j( 0UL );
2602  for( ; (j+2UL) <= N; j+=2UL ) {
2603  IntrinsicType xmm1, xmm2;
2604  for( size_t k=0UL; k<K; k+=IT::size ) {
2605  const IntrinsicType a1( A.load(i,k) );
2606  xmm1 = xmm1 + a1 * B.load(k,j );
2607  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2608  }
2609  (~C)(i,j ) = sum( xmm1 ) * scalar;
2610  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
2611  }
2612  if( j < N ) {
2613  IntrinsicType xmm1, xmm2;
2614  for( size_t k=0UL; k<K; k+=IT::size ) {
2615  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2616  }
2617  (~C)(i,j) = sum( xmm1 ) * scalar;
2618  }
2619  }
2620  }
2621  //**********************************************************************************************
2622 
2623  //**BLAS-based assignment to dense matrices (default)*******************************************
2637  template< typename MT3 // Type of the left-hand side target matrix
2638  , typename MT4 // Type of the left-hand side matrix operand
2639  , typename MT5 // Type of the right-hand side matrix operand
2640  , typename ST2 > // Type of the scalar value
2641  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2642  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2643  {
2644  selectDefaultAssignKernel( C, A, B, scalar );
2645  }
2646  //**********************************************************************************************
2647 
2648  //**BLAS-based assignment to dense matrices (single precision)**********************************
2649 #if BLAZE_BLAS_MODE
2650 
2663  template< typename MT3 // Type of the left-hand side target matrix
2664  , typename MT4 // Type of the left-hand side matrix operand
2665  , typename MT5 // Type of the right-hand side matrix operand
2666  , typename ST2 > // Type of the scalar value
2667  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2668  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2669  {
2670  using boost::numeric_cast;
2671 
2675 
2676  const int M ( numeric_cast<int>( A.rows() ) );
2677  const int N ( numeric_cast<int>( B.columns() ) );
2678  const int K ( numeric_cast<int>( A.columns() ) );
2679  const int lda( numeric_cast<int>( A.spacing() ) );
2680  const int ldb( numeric_cast<int>( B.spacing() ) );
2681  const int ldc( numeric_cast<int>( C.spacing() ) );
2682 
2683  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2684  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2685  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2686  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2687  }
2688 #endif
2689  //**********************************************************************************************
2690 
2691  //**BLAS-based assignment to dense matrices (double precision)**********************************
2692 #if BLAZE_BLAS_MODE
2693 
2706  template< typename MT3 // Type of the left-hand side target matrix
2707  , typename MT4 // Type of the left-hand side matrix operand
2708  , typename MT5 // Type of the right-hand side matrix operand
2709  , typename ST2 > // Type of the scalar value
2710  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2711  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2712  {
2713  using boost::numeric_cast;
2714 
2718 
2719  const int M ( numeric_cast<int>( A.rows() ) );
2720  const int N ( numeric_cast<int>( B.columns() ) );
2721  const int K ( numeric_cast<int>( A.columns() ) );
2722  const int lda( numeric_cast<int>( A.spacing() ) );
2723  const int ldb( numeric_cast<int>( B.spacing() ) );
2724  const int ldc( numeric_cast<int>( C.spacing() ) );
2725 
2726  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2727  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2728  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2729  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2730  }
2731 #endif
2732  //**********************************************************************************************
2733 
2734  //**BLAS-based assignment to dense matrices (single precision complex)**************************
2735 #if BLAZE_BLAS_MODE
2736 
2749  template< typename MT3 // Type of the left-hand side target matrix
2750  , typename MT4 // Type of the left-hand side matrix operand
2751  , typename MT5 // Type of the right-hand side matrix operand
2752  , typename ST2 > // Type of the scalar value
2753  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2754  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2755  {
2756  using boost::numeric_cast;
2757 
2761  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2762  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2763  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2764 
2765  const int M ( numeric_cast<int>( A.rows() ) );
2766  const int N ( numeric_cast<int>( B.columns() ) );
2767  const int K ( numeric_cast<int>( A.columns() ) );
2768  const int lda( numeric_cast<int>( A.spacing() ) );
2769  const int ldb( numeric_cast<int>( B.spacing() ) );
2770  const int ldc( numeric_cast<int>( C.spacing() ) );
2771  const complex<float> alpha( scalar );
2772  const complex<float> beta ( 0.0F, 0.0F );
2773 
2774  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2775  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2776  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2777  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2778  }
2779 #endif
2780  //**********************************************************************************************
2781 
2782  //**BLAS-based assignment to dense matrices (double precision complex)**************************
2783 #if BLAZE_BLAS_MODE
2784 
2797  template< typename MT3 // Type of the left-hand side target matrix
2798  , typename MT4 // Type of the left-hand side matrix operand
2799  , typename MT5 // Type of the right-hand side matrix operand
2800  , typename ST2 > // Type of the scalar value
2801  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2802  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2803  {
2804  using boost::numeric_cast;
2805 
2809  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2810  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2811  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2812 
2813  const int M ( numeric_cast<int>( A.rows() ) );
2814  const int N ( numeric_cast<int>( B.columns() ) );
2815  const int K ( numeric_cast<int>( A.columns() ) );
2816  const int lda( numeric_cast<int>( A.spacing() ) );
2817  const int ldb( numeric_cast<int>( B.spacing() ) );
2818  const int ldc( numeric_cast<int>( C.spacing() ) );
2819  const complex<double> alpha( scalar );
2820  const complex<double> beta ( 0.0, 0.0 );
2821 
2822  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2823  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2824  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2825  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2826  }
2827 #endif
2828  //**********************************************************************************************
2829 
2830  //**Assignment to sparse matrices***************************************************************
2842  template< typename MT // Type of the target sparse matrix
2843  , bool SO > // Storage order of the target sparse matrix
2844  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2845  {
2847 
2848  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2849 
2856 
2857  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2858  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2859 
2860  const TmpType tmp( rhs );
2861  assign( ~lhs, tmp );
2862  }
2863  //**********************************************************************************************
2864 
2865  //**Addition assignment to dense matrices*******************************************************
2877  template< typename MT3 // Type of the target dense matrix
2878  , bool SO > // Storage order of the target dense matrix
2879  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2880  {
2882 
2883  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2884  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2885 
2886  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2887  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2888 
2889  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
2890  return;
2891  }
2892 
2893  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2894  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2895 
2896  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2897  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2898  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2899  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2900  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2901  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2902 
2903  if( (~lhs).rows() * (~lhs).columns() < DMATTDMATMULT_THRESHOLD )
2904  DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2905  else
2906  DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2907  }
2908  //**********************************************************************************************
2909 
2910  //**Default addition assignment to dense matrices***********************************************
2924  template< typename MT3 // Type of the left-hand side target matrix
2925  , typename MT4 // Type of the left-hand side matrix operand
2926  , typename MT5 // Type of the right-hand side matrix operand
2927  , typename ST2 > // Type of the scalar value
2928  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2929  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2930  {
2931  const ResultType tmp( A * B * scalar );
2932  addAssign( C, tmp );
2933  }
2934  //**********************************************************************************************
2935 
2936  //**Vectorized default addition assignment to row-major dense matrices**************************
2950  template< typename MT3 // Type of the left-hand side target matrix
2951  , typename MT4 // Type of the left-hand side matrix operand
2952  , typename MT5 // Type of the right-hand side matrix operand
2953  , typename ST2 > // Type of the scalar value
2954  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2955  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2956  {
2957  typedef IntrinsicTrait<ElementType> IT;
2958 
2959  const size_t M( A.rows() );
2960  const size_t N( B.columns() );
2961  const size_t K( A.columns() );
2962 
2963  size_t i( 0UL );
2964 
2965  for( ; (i+2UL) <= M; i+=2UL ) {
2966  size_t j( 0UL );
2967  for( ; (j+4UL) <= N; j+=4UL ) {
2968  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2969  for( size_t k=0UL; k<K; k+=IT::size ) {
2970  const IntrinsicType a1( A.load(i ,k) );
2971  const IntrinsicType a2( A.load(i+1UL,k) );
2972  const IntrinsicType b1( B.load(k,j ) );
2973  const IntrinsicType b2( B.load(k,j+1UL) );
2974  const IntrinsicType b3( B.load(k,j+2UL) );
2975  const IntrinsicType b4( B.load(k,j+3UL) );
2976  xmm1 = xmm1 + a1 * b1;
2977  xmm2 = xmm2 + a1 * b2;
2978  xmm3 = xmm3 + a1 * b3;
2979  xmm4 = xmm4 + a1 * b4;
2980  xmm5 = xmm5 + a2 * b1;
2981  xmm6 = xmm6 + a2 * b2;
2982  xmm7 = xmm7 + a2 * b3;
2983  xmm8 = xmm8 + a2 * b4;
2984  }
2985  (~C)(i ,j ) += sum( xmm1 ) * scalar;
2986  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
2987  (~C)(i ,j+2UL) += sum( xmm3 ) * scalar;
2988  (~C)(i ,j+3UL) += sum( xmm4 ) * scalar;
2989  (~C)(i+1UL,j ) += sum( xmm5 ) * scalar;
2990  (~C)(i+1UL,j+1UL) += sum( xmm6 ) * scalar;
2991  (~C)(i+1UL,j+2UL) += sum( xmm7 ) * scalar;
2992  (~C)(i+1UL,j+3UL) += sum( xmm8 ) * scalar;
2993  }
2994  for( ; (j+2UL) <= N; j+=2UL ) {
2995  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2996  for( size_t k=0UL; k<K; k+=IT::size ) {
2997  const IntrinsicType a1( A.load(i ,k) );
2998  const IntrinsicType a2( A.load(i+1UL,k) );
2999  const IntrinsicType b1( B.load(k,j ) );
3000  const IntrinsicType b2( B.load(k,j+1UL) );
3001  xmm1 = xmm1 + a1 * b1;
3002  xmm2 = xmm2 + a1 * b2;
3003  xmm3 = xmm3 + a2 * b1;
3004  xmm4 = xmm4 + a2 * b2;
3005  }
3006  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3007  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3008  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3009  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3010  }
3011  if( j < N ) {
3012  IntrinsicType xmm1, xmm2;
3013  for( size_t k=0UL; k<K; k+=IT::size ) {
3014  const IntrinsicType b1( B.load(k,j) );
3015  xmm1 = xmm1 + A.load(i ,k) * b1;
3016  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3017  }
3018  (~C)(i ,j) += sum( xmm1 ) * scalar;
3019  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3020  }
3021  }
3022  if( i < M ) {
3023  size_t j( 0UL );
3024  for( ; (j+4UL) <= N; j+=4UL ) {
3025  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3026  for( size_t k=0UL; k<K; k+=IT::size ) {
3027  const IntrinsicType a1( A.load(i,k) );
3028  xmm1 = xmm1 + a1 * B.load(k,j );
3029  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3030  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3031  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3032  }
3033  (~C)(i,j ) += sum( xmm1 ) * scalar;
3034  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3035  (~C)(i,j+2UL) += sum( xmm3 ) * scalar;
3036  (~C)(i,j+3UL) += sum( xmm4 ) * scalar;
3037  }
3038  for( ; (j+2UL) <= N; j+=2UL ) {
3039  IntrinsicType xmm1, xmm2;
3040  for( size_t k=0UL; k<K; k+=IT::size ) {
3041  const IntrinsicType a1( A.load(i,k) );
3042  xmm1 = xmm1 + a1 * B.load(k,j );
3043  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3044  }
3045  (~C)(i,j ) += sum( xmm1 ) * scalar;
3046  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3047  }
3048  if( j < N ) {
3049  IntrinsicType xmm1, xmm2;
3050  for( size_t k=0UL; k<K; k+=IT::size ) {
3051  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3052  }
3053  (~C)(i,j) += sum( xmm1 ) * scalar;
3054  }
3055  }
3056  }
3057  //**********************************************************************************************
3058 
3059  //**Vectorized default addition assignment to column-major dense matrices***********************
3073  template< typename MT3 // Type of the left-hand side target matrix
3074  , typename MT4 // Type of the left-hand side matrix operand
3075  , typename MT5 // Type of the right-hand side matrix operand
3076  , typename ST2 > // Type of the scalar value
3077  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3078  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3079  {
3080  typedef IntrinsicTrait<ElementType> IT;
3081 
3082  const size_t M( A.rows() );
3083  const size_t N( B.columns() );
3084  const size_t K( A.columns() );
3085 
3086  size_t i( 0UL );
3087 
3088  for( ; (i+4UL) <= M; i+=4UL ) {
3089  size_t j( 0UL );
3090  for( ; (j+2UL) <= N; j+=2UL ) {
3091  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3092  for( size_t k=0UL; k<K; k+=IT::size ) {
3093  const IntrinsicType a1( A.load(i ,k) );
3094  const IntrinsicType a2( A.load(i+1UL,k) );
3095  const IntrinsicType a3( A.load(i+2UL,k) );
3096  const IntrinsicType a4( A.load(i+3UL,k) );
3097  const IntrinsicType b1( B.load(k,j ) );
3098  const IntrinsicType b2( B.load(k,j+1UL) );
3099  xmm1 = xmm1 + a1 * b1;
3100  xmm2 = xmm2 + a1 * b2;
3101  xmm3 = xmm3 + a2 * b1;
3102  xmm4 = xmm4 + a2 * b2;
3103  xmm5 = xmm5 + a3 * b1;
3104  xmm6 = xmm6 + a3 * b2;
3105  xmm7 = xmm7 + a4 * b1;
3106  xmm8 = xmm8 + a4 * b2;
3107  }
3108  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3109  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3110  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3111  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3112  (~C)(i+2UL,j ) += sum( xmm5 ) * scalar;
3113  (~C)(i+2UL,j+1UL) += sum( xmm6 ) * scalar;
3114  (~C)(i+3UL,j ) += sum( xmm7 ) * scalar;
3115  (~C)(i+3UL,j+1UL) += sum( xmm8 ) * scalar;
3116  }
3117  if( j < N ) {
3118  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3119  for( size_t k=0UL; k<K; k+=IT::size ) {
3120  const IntrinsicType b1( B.load(k,j) );
3121  xmm1 = xmm1 + A.load(i ,k) * b1;
3122  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3123  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3124  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3125  }
3126  (~C)(i ,j) += sum( xmm1 ) * scalar;
3127  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3128  (~C)(i+2UL,j) += sum( xmm3 ) * scalar;
3129  (~C)(i+3UL,j) += sum( xmm4 ) * scalar;
3130  }
3131  }
3132  for( ; (i+2UL) <= M; i+=2UL ) {
3133  size_t j( 0UL );
3134  for( ; (j+2UL) <= N; j+=2UL ) {
3135  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3136  for( size_t k=0UL; k<K; k+=IT::size ) {
3137  const IntrinsicType a1( A.load(i ,k) );
3138  const IntrinsicType a2( A.load(i+1UL,k) );
3139  const IntrinsicType b1( B.load(k,j ) );
3140  const IntrinsicType b2( B.load(k,j+1UL) );
3141  xmm1 = xmm1 + a1 * b1;
3142  xmm2 = xmm2 + a1 * b2;
3143  xmm3 = xmm3 + a2 * b1;
3144  xmm4 = xmm4 + a2 * b2;
3145  }
3146  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3147  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3148  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3149  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3150  }
3151  if( j < N ) {
3152  IntrinsicType xmm1, xmm2;
3153  for( size_t k=0UL; k<K; k+=IT::size ) {
3154  const IntrinsicType b1( B.load(k,j) );
3155  xmm1 = xmm1 + A.load(i ,k) * b1;
3156  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3157  }
3158  (~C)(i ,j) += sum( xmm1 ) * scalar;
3159  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3160  }
3161  }
3162  if( i < M ) {
3163  size_t j( 0UL );
3164  for( ; (j+2UL) <= N; j+=2UL ) {
3165  IntrinsicType xmm1, xmm2;
3166  for( size_t k=0UL; k<K; k+=IT::size ) {
3167  const IntrinsicType a1( A.load(i,k) );
3168  xmm1 = xmm1 + a1 * B.load(k,j );
3169  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3170  }
3171  (~C)(i,j ) += sum( xmm1 ) * scalar;
3172  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3173  }
3174  if( j < N ) {
3175  IntrinsicType xmm1, xmm2;
3176  for( size_t k=0UL; k<K; k+=IT::size ) {
3177  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3178  }
3179  (~C)(i,j) += sum( xmm1 ) * scalar;
3180  }
3181  }
3182  }
3183  //**********************************************************************************************
3184 
3185  //**BLAS-based addition assignment to dense matrices (default)**********************************
3199  template< typename MT3 // Type of the left-hand side target matrix
3200  , typename MT4 // Type of the left-hand side matrix operand
3201  , typename MT5 // Type of the right-hand side matrix operand
3202  , typename ST2 > // Type of the scalar value
3203  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3204  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3205  {
3206  selectDefaultAddAssignKernel( C, A, B, scalar );
3207  }
3208  //**********************************************************************************************
3209 
3210  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3211 #if BLAZE_BLAS_MODE
3212 
3225  template< typename MT3 // Type of the left-hand side target matrix
3226  , typename MT4 // Type of the left-hand side matrix operand
3227  , typename MT5 // Type of the right-hand side matrix operand
3228  , typename ST2 > // Type of the scalar value
3229  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3230  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3231  {
3232  using boost::numeric_cast;
3233 
3237 
3238  const int M ( numeric_cast<int>( A.rows() ) );
3239  const int N ( numeric_cast<int>( B.columns() ) );
3240  const int K ( numeric_cast<int>( A.columns() ) );
3241  const int lda( numeric_cast<int>( A.spacing() ) );
3242  const int ldb( numeric_cast<int>( B.spacing() ) );
3243  const int ldc( numeric_cast<int>( C.spacing() ) );
3244 
3245  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3246  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3247  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3248  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3249  }
3250 #endif
3251  //**********************************************************************************************
3252 
3253  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3254 #if BLAZE_BLAS_MODE
3255 
3268  template< typename MT3 // Type of the left-hand side target matrix
3269  , typename MT4 // Type of the left-hand side matrix operand
3270  , typename MT5 // Type of the right-hand side matrix operand
3271  , typename ST2 > // Type of the scalar value
3272  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3273  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3274  {
3275  using boost::numeric_cast;
3276 
3280 
3281  const int M ( numeric_cast<int>( A.rows() ) );
3282  const int N ( numeric_cast<int>( B.columns() ) );
3283  const int K ( numeric_cast<int>( A.columns() ) );
3284  const int lda( numeric_cast<int>( A.spacing() ) );
3285  const int ldb( numeric_cast<int>( B.spacing() ) );
3286  const int ldc( numeric_cast<int>( C.spacing() ) );
3287 
3288  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3289  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3290  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3291  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3292  }
3293 #endif
3294  //**********************************************************************************************
3295 
3296  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3297 #if BLAZE_BLAS_MODE
3298 
3311  template< typename MT3 // Type of the left-hand side target matrix
3312  , typename MT4 // Type of the left-hand side matrix operand
3313  , typename MT5 // Type of the right-hand side matrix operand
3314  , typename ST2 > // Type of the scalar value
3315  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3316  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3317  {
3318  using boost::numeric_cast;
3319 
3323  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3324  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3325  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3326 
3327  const int M ( numeric_cast<int>( A.rows() ) );
3328  const int N ( numeric_cast<int>( B.columns() ) );
3329  const int K ( numeric_cast<int>( A.columns() ) );
3330  const int lda( numeric_cast<int>( A.spacing() ) );
3331  const int ldb( numeric_cast<int>( B.spacing() ) );
3332  const int ldc( numeric_cast<int>( C.spacing() ) );
3333  const complex<float> alpha( scalar );
3334  const complex<float> beta ( 1.0F, 0.0F );
3335 
3336  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3337  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3338  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3339  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3340  }
3341 #endif
3342  //**********************************************************************************************
3343 
3344  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3345 #if BLAZE_BLAS_MODE
3346 
3359  template< typename MT3 // Type of the left-hand side target matrix
3360  , typename MT4 // Type of the left-hand side matrix operand
3361  , typename MT5 // Type of the right-hand side matrix operand
3362  , typename ST2 > // Type of the scalar value
3363  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3364  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3365  {
3366  using boost::numeric_cast;
3367 
3371  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3372  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3373  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3374 
3375  const int M ( numeric_cast<int>( A.rows() ) );
3376  const int N ( numeric_cast<int>( B.columns() ) );
3377  const int K ( numeric_cast<int>( A.columns() ) );
3378  const int lda( numeric_cast<int>( A.spacing() ) );
3379  const int ldb( numeric_cast<int>( B.spacing() ) );
3380  const int ldc( numeric_cast<int>( C.spacing() ) );
3381  const complex<double> alpha( scalar );
3382  const complex<double> beta ( 1.0, 0.0 );
3383 
3384  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3385  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3386  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3387  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3388  }
3389 #endif
3390  //**********************************************************************************************
3391 
3392  //**Addition assignment to sparse matrices******************************************************
3393  // No special implementation for the addition assignment to sparse matrices.
3394  //**********************************************************************************************
3395 
3396  //**Subtraction assignment to dense matrices****************************************************
3408  template< typename MT3 // Type of the target dense matrix
3409  , bool SO > // Storage order of the target dense matrix
3410  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3411  {
3413 
3414  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3415  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3416 
3417  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3418  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3419 
3420  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3421  return;
3422  }
3423 
3424  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3425  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3426 
3427  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3428  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3429  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3430  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3431  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3432  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3433 
3434  if( (~lhs).rows() * (~lhs).columns() < DMATTDMATMULT_THRESHOLD )
3435  DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3436  else
3437  DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3438  }
3439  //**********************************************************************************************
3440 
3441  //**Default subtraction assignment to dense matrices********************************************
3455  template< typename MT3 // Type of the left-hand side target matrix
3456  , typename MT4 // Type of the left-hand side matrix operand
3457  , typename MT5 // Type of the right-hand side matrix operand
3458  , typename ST2 > // Type of the scalar value
3459  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3460  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3461  {
3462  const ResultType tmp( A * B * scalar );
3463  subAssign( C, tmp );
3464  }
3465  //**********************************************************************************************
3466 
3467  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3481  template< typename MT3 // Type of the left-hand side target matrix
3482  , typename MT4 // Type of the left-hand side matrix operand
3483  , typename MT5 // Type of the right-hand side matrix operand
3484  , typename ST2 > // Type of the scalar value
3485  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3486  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3487  {
3488  typedef IntrinsicTrait<ElementType> IT;
3489 
3490  const size_t M( A.rows() );
3491  const size_t N( B.columns() );
3492  const size_t K( A.columns() );
3493 
3494  size_t i( 0UL );
3495 
3496  for( ; (i+2UL) <= M; i+=2UL ) {
3497  size_t j( 0UL );
3498  for( ; (j+4UL) <= N; j+=4UL ) {
3499  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3500  for( size_t k=0UL; k<K; k+=IT::size ) {
3501  const IntrinsicType a1( A.load(i ,k) );
3502  const IntrinsicType a2( A.load(i+1UL,k) );
3503  const IntrinsicType b1( B.load(k,j ) );
3504  const IntrinsicType b2( B.load(k,j+1UL) );
3505  const IntrinsicType b3( B.load(k,j+2UL) );
3506  const IntrinsicType b4( B.load(k,j+3UL) );
3507  xmm1 = xmm1 + a1 * b1;
3508  xmm2 = xmm2 + a1 * b2;
3509  xmm3 = xmm3 + a1 * b3;
3510  xmm4 = xmm4 + a1 * b4;
3511  xmm5 = xmm5 + a2 * b1;
3512  xmm6 = xmm6 + a2 * b2;
3513  xmm7 = xmm7 + a2 * b3;
3514  xmm8 = xmm8 + a2 * b4;
3515  }
3516  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
3517  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
3518  (~C)(i ,j+2UL) -= sum( xmm3 ) * scalar;
3519  (~C)(i ,j+3UL) -= sum( xmm4 ) * scalar;
3520  (~C)(i+1UL,j ) -= sum( xmm5 ) * scalar;
3521  (~C)(i+1UL,j+1UL) -= sum( xmm6 ) * scalar;
3522  (~C)(i+1UL,j+2UL) -= sum( xmm7 ) * scalar;
3523  (~C)(i+1UL,j+3UL) -= sum( xmm8 ) * scalar;
3524  }
3525  for( ; (j+2UL) <= N; j+=2UL ) {
3526  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3527  for( size_t k=0UL; k<K; k+=IT::size ) {
3528  const IntrinsicType a1( A.load(i ,k) );
3529  const IntrinsicType a2( A.load(i+1UL,k) );
3530  const IntrinsicType b1( B.load(k,j ) );
3531  const IntrinsicType b2( B.load(k,j+1UL) );
3532  xmm1 = xmm1 + a1 * b1;
3533  xmm2 = xmm2 + a1 * b2;
3534  xmm3 = xmm3 + a2 * b1;
3535  xmm4 = xmm4 + a2 * b2;
3536  }
3537  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
3538  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
3539  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
3540  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
3541  }
3542  if( j < N ) {
3543  IntrinsicType xmm1, xmm2;
3544  for( size_t k=0UL; k<K; k+=IT::size ) {
3545  const IntrinsicType b1( B.load(k,j) );
3546  xmm1 = xmm1 + A.load(i ,k) * b1;
3547  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3548  }
3549  (~C)(i ,j) -= sum( xmm1 ) * scalar;
3550  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
3551  }
3552  }
3553  if( i < M ) {
3554  size_t j( 0UL );
3555  for( ; (j+4UL) <= N; j+=4UL ) {
3556  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3557  for( size_t k=0UL; k<K; k+=IT::size ) {
3558  const IntrinsicType a1( A.load(i,k) );
3559  xmm1 = xmm1 + a1 * B.load(k,j );
3560  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3561  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3562  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3563  }
3564  (~C)(i,j ) -= sum( xmm1 ) * scalar;
3565  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
3566  (~C)(i,j+2UL) -= sum( xmm3 ) * scalar;
3567  (~C)(i,j+3UL) -= sum( xmm4 ) * scalar;
3568  }
3569  for( ; (j+2UL) <= N; j+=2UL ) {
3570  IntrinsicType xmm1, xmm2;
3571  for( size_t k=0UL; k<K; k+=IT::size ) {
3572  const IntrinsicType a1( A.load(i,k) );
3573  xmm1 = xmm1 + a1 * B.load(k,j );
3574  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3575  }
3576  (~C)(i,j ) -= sum( xmm1 ) * scalar;
3577  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
3578  }
3579  if( j < N ) {
3580  IntrinsicType xmm1, xmm2;
3581  for( size_t k=0UL; k<K; k+=IT::size ) {
3582  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3583  }
3584  (~C)(i,j) -= sum( xmm1 ) * scalar;
3585  }
3586  }
3587  }
3588  //**********************************************************************************************
3589 
3590  //**Vectorized default subtraction assignment to column-major dense matrices********************
3604  template< typename MT3 // Type of the left-hand side target matrix
3605  , typename MT4 // Type of the left-hand side matrix operand
3606  , typename MT5 // Type of the right-hand side matrix operand
3607  , typename ST2 > // Type of the scalar value
3608  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3609  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3610  {
3611  typedef IntrinsicTrait<ElementType> IT;
3612 
3613  const size_t M( A.rows() );
3614  const size_t N( B.columns() );
3615  const size_t K( A.columns() );
3616 
3617  size_t i( 0UL );
3618 
3619  for( ; (i+4UL) <= M; i+=4UL ) {
3620  size_t j( 0UL );
3621  for( ; (j+2UL) <= N; j+=2UL ) {
3622  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3623  for( size_t k=0UL; k<K; k+=IT::size ) {
3624  const IntrinsicType a1( A.load(i ,k) );
3625  const IntrinsicType a2( A.load(i+1UL,k) );
3626  const IntrinsicType a3( A.load(i+2UL,k) );
3627  const IntrinsicType a4( A.load(i+3UL,k) );
3628  const IntrinsicType b1( B.load(k,j ) );
3629  const IntrinsicType b2( B.load(k,j+1UL) );
3630  xmm1 = xmm1 + a1 * b1;
3631  xmm2 = xmm2 + a1 * b2;
3632  xmm3 = xmm3 + a2 * b1;
3633  xmm4 = xmm4 + a2 * b2;
3634  xmm5 = xmm5 + a3 * b1;
3635  xmm6 = xmm6 + a3 * b2;
3636  xmm7 = xmm7 + a4 * b1;
3637  xmm8 = xmm8 + a4 * b2;
3638  }
3639  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
3640  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
3641  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
3642  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
3643  (~C)(i+2UL,j ) -= sum( xmm5 ) * scalar;
3644  (~C)(i+2UL,j+1UL) -= sum( xmm6 ) * scalar;
3645  (~C)(i+3UL,j ) -= sum( xmm7 ) * scalar;
3646  (~C)(i+3UL,j+1UL) -= sum( xmm8 ) * scalar;
3647  }
3648  if( j < N ) {
3649  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3650  for( size_t k=0UL; k<K; k+=IT::size ) {
3651  const IntrinsicType b1( B.load(k,j) );
3652  xmm1 = xmm1 + A.load(i ,k) * b1;
3653  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3654  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3655  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3656  }
3657  (~C)(i ,j) -= sum( xmm1 ) * scalar;
3658  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
3659  (~C)(i+2UL,j) -= sum( xmm3 ) * scalar;
3660  (~C)(i+3UL,j) -= sum( xmm4 ) * scalar;
3661  }
3662  }
3663  for( ; (i+2UL) <= M; i+=2UL ) {
3664  size_t j( 0UL );
3665  for( ; (j+2UL) <= N; j+=2UL ) {
3666  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3667  for( size_t k=0UL; k<K; k+=IT::size ) {
3668  const IntrinsicType a1( A.load(i ,k) );
3669  const IntrinsicType a2( A.load(i+1UL,k) );
3670  const IntrinsicType b1( B.load(k,j ) );
3671  const IntrinsicType b2( B.load(k,j+1UL) );
3672  xmm1 = xmm1 + a1 * b1;
3673  xmm2 = xmm2 + a1 * b2;
3674  xmm3 = xmm3 + a2 * b1;
3675  xmm4 = xmm4 + a2 * b2;
3676  }
3677  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
3678  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
3679  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
3680  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
3681  }
3682  if( j < N ) {
3683  IntrinsicType xmm1, xmm2;
3684  for( size_t k=0UL; k<K; k+=IT::size ) {
3685  const IntrinsicType b1( B.load(k,j) );
3686  xmm1 = xmm1 + A.load(i ,k) * b1;
3687  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3688  }
3689  (~C)(i ,j) -= sum( xmm1 ) * scalar;
3690  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
3691  }
3692  }
3693  if( i < M ) {
3694  size_t j( 0UL );
3695  for( ; (j+2UL) <= N; j+=2UL ) {
3696  IntrinsicType xmm1, xmm2;
3697  for( size_t k=0UL; k<K; k+=IT::size ) {
3698  const IntrinsicType a1( A.load(i,k) );
3699  xmm1 = xmm1 + a1 * B.load(k,j );
3700  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3701  }
3702  (~C)(i,j ) -= sum( xmm1 ) * scalar;
3703  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
3704  }
3705  if( j < N ) {
3706  IntrinsicType xmm1, xmm2;
3707  for( size_t k=0UL; k<K; k+=IT::size ) {
3708  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3709  }
3710  (~C)(i,j) -= sum( xmm1 ) * scalar;
3711  }
3712  }
3713  }
3714  //**********************************************************************************************
3715 
3716  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3730  template< typename MT3 // Type of the left-hand side target matrix
3731  , typename MT4 // Type of the left-hand side matrix operand
3732  , typename MT5 // Type of the right-hand side matrix operand
3733  , typename ST2 > // Type of the scalar value
3734  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3735  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3736  {
3737  selectDefaultSubAssignKernel( C, A, B, scalar );
3738  }
3739  //**********************************************************************************************
3740 
3741  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3742 #if BLAZE_BLAS_MODE
3743 
3756  template< typename MT3 // Type of the left-hand side target matrix
3757  , typename MT4 // Type of the left-hand side matrix operand
3758  , typename MT5 // Type of the right-hand side matrix operand
3759  , typename ST2 > // Type of the scalar value
3760  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3761  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3762  {
3763  using boost::numeric_cast;
3764 
3768 
3769  const int M ( numeric_cast<int>( A.rows() ) );
3770  const int N ( numeric_cast<int>( B.columns() ) );
3771  const int K ( numeric_cast<int>( A.columns() ) );
3772  const int lda( numeric_cast<int>( A.spacing() ) );
3773  const int ldb( numeric_cast<int>( B.spacing() ) );
3774  const int ldc( numeric_cast<int>( C.spacing() ) );
3775 
3776  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3777  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3778  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3779  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3780  }
3781 #endif
3782  //**********************************************************************************************
3783 
3784  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3785 #if BLAZE_BLAS_MODE
3786 
3799  template< typename MT3 // Type of the left-hand side target matrix
3800  , typename MT4 // Type of the left-hand side matrix operand
3801  , typename MT5 // Type of the right-hand side matrix operand
3802  , typename ST2 > // Type of the scalar value
3803  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3804  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3805  {
3806  using boost::numeric_cast;
3807 
3811 
3812  const int M ( numeric_cast<int>( A.rows() ) );
3813  const int N ( numeric_cast<int>( B.columns() ) );
3814  const int K ( numeric_cast<int>( A.columns() ) );
3815  const int lda( numeric_cast<int>( A.spacing() ) );
3816  const int ldb( numeric_cast<int>( B.spacing() ) );
3817  const int ldc( numeric_cast<int>( C.spacing() ) );
3818 
3819  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3820  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3821  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3822  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3823  }
3824 #endif
3825  //**********************************************************************************************
3826 
3827  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3828 #if BLAZE_BLAS_MODE
3829 
3842  template< typename MT3 // Type of the left-hand side target matrix
3843  , typename MT4 // Type of the left-hand side matrix operand
3844  , typename MT5 // Type of the right-hand side matrix operand
3845  , typename ST2 > // Type of the scalar value
3846  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3847  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3848  {
3849  using boost::numeric_cast;
3850 
3854  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3855  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3856  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3857 
3858  const int M ( numeric_cast<int>( A.rows() ) );
3859  const int N ( numeric_cast<int>( B.columns() ) );
3860  const int K ( numeric_cast<int>( A.columns() ) );
3861  const int lda( numeric_cast<int>( A.spacing() ) );
3862  const int ldb( numeric_cast<int>( B.spacing() ) );
3863  const int ldc( numeric_cast<int>( C.spacing() ) );
3864  const complex<float> alpha( -scalar );
3865  const complex<float> beta ( 1.0F, 0.0F );
3866 
3867  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3868  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3869  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3870  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3871  }
3872 #endif
3873  //**********************************************************************************************
3874 
3875  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3876 #if BLAZE_BLAS_MODE
3877 
3890  template< typename MT3 // Type of the left-hand side target matrix
3891  , typename MT4 // Type of the left-hand side matrix operand
3892  , typename MT5 // Type of the right-hand side matrix operand
3893  , typename ST2 > // Type of the scalar value
3894  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3895  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3896  {
3897  using boost::numeric_cast;
3898 
3902  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3903  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3904  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3905 
3906  const int M ( numeric_cast<int>( A.rows() ) );
3907  const int N ( numeric_cast<int>( B.columns() ) );
3908  const int K ( numeric_cast<int>( A.columns() ) );
3909  const int lda( numeric_cast<int>( A.spacing() ) );
3910  const int ldb( numeric_cast<int>( B.spacing() ) );
3911  const int ldc( numeric_cast<int>( C.spacing() ) );
3912  const complex<double> alpha( -scalar );
3913  const complex<double> beta ( 1.0, 0.0 );
3914 
3915  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3916  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3917  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3918  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3919  }
3920 #endif
3921  //**********************************************************************************************
3922 
3923  //**Subtraction assignment to sparse matrices***************************************************
3924  // No special implementation for the subtraction assignment to sparse matrices.
3925  //**********************************************************************************************
3926 
3927  //**Multiplication assignment to dense matrices*************************************************
3928  // No special implementation for the multiplication assignment to dense matrices.
3929  //**********************************************************************************************
3930 
3931  //**Multiplication assignment to sparse matrices************************************************
3932  // No special implementation for the multiplication assignment to sparse matrices.
3933  //**********************************************************************************************
3934 
3935  //**Compile time checks*************************************************************************
3944  //**********************************************************************************************
3945 };
3947 //*************************************************************************************************
3948 
3949 
3950 
3951 
3952 //=================================================================================================
3953 //
3954 // GLOBAL BINARY ARITHMETIC OPERATORS
3955 //
3956 //=================================================================================================
3957 
3958 //*************************************************************************************************
3987 template< typename T1 // Type of the left-hand side dense matrix
3988  , typename T2 > // Type of the right-hand side dense matrix
3989 inline const DMatTDMatMultExpr<T1,T2>
3991 {
3993 
3994  if( (~lhs).columns() != (~rhs).rows() )
3995  throw std::invalid_argument( "Matrix sizes do not match" );
3996 
3997  return DMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
3998 }
3999 //*************************************************************************************************
4000 
4001 
4002 
4003 
4004 //=================================================================================================
4005 //
4006 // EXPRESSION TRAIT SPECIALIZATIONS
4007 //
4008 //=================================================================================================
4009 
4010 //*************************************************************************************************
4012 template< typename MT1, typename MT2, typename VT >
4013 struct DMatDVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
4014 {
4015  public:
4016  //**********************************************************************************************
4017  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4018  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4019  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4020  , typename DMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4021  , INVALID_TYPE >::Type Type;
4022  //**********************************************************************************************
4023 };
4025 //*************************************************************************************************
4026 
4027 
4028 //*************************************************************************************************
4030 template< typename MT1, typename MT2, typename VT >
4031 struct DMatSVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
4032 {
4033  public:
4034  //**********************************************************************************************
4035  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4036  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4037  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4038  , typename DMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4039  , INVALID_TYPE >::Type Type;
4040  //**********************************************************************************************
4041 };
4043 //*************************************************************************************************
4044 
4045 
4046 //*************************************************************************************************
4048 template< typename VT, typename MT1, typename MT2 >
4049 struct TDVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
4050 {
4051  public:
4052  //**********************************************************************************************
4053  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4054  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4055  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4056  , typename TDVecTDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4057  , INVALID_TYPE >::Type Type;
4058  //**********************************************************************************************
4059 };
4061 //*************************************************************************************************
4062 
4063 
4064 //*************************************************************************************************
4066 template< typename VT, typename MT1, typename MT2 >
4067 struct TSVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
4068 {
4069  public:
4070  //**********************************************************************************************
4071  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4072  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4073  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4074  , typename TDVecTDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4075  , INVALID_TYPE >::Type Type;
4076  //**********************************************************************************************
4077 };
4079 //*************************************************************************************************
4080 
4081 
4082 //*************************************************************************************************
4084 template< typename MT1, typename MT2 >
4085 struct SubmatrixExprTrait< DMatTDMatMultExpr<MT1,MT2> >
4086 {
4087  public:
4088  //**********************************************************************************************
4089  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1>::Type
4090  , typename SubmatrixExprTrait<const MT2>::Type >::Type Type;
4091  //**********************************************************************************************
4092 };
4094 //*************************************************************************************************
4095 
4096 
4097 //*************************************************************************************************
4099 template< typename MT1, typename MT2 >
4100 struct RowExprTrait< DMatTDMatMultExpr<MT1,MT2> >
4101 {
4102  public:
4103  //**********************************************************************************************
4104  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4105  //**********************************************************************************************
4106 };
4108 //*************************************************************************************************
4109 
4110 
4111 //*************************************************************************************************
4113 template< typename MT1, typename MT2 >
4114 struct ColumnExprTrait< DMatTDMatMultExpr<MT1,MT2> >
4115 {
4116  public:
4117  //**********************************************************************************************
4118  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
4119  //**********************************************************************************************
4120 };
4122 //*************************************************************************************************
4123 
4124 } // namespace blaze
4125 
4126 #endif
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:221
Data type constraint.
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:229
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:223
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:267
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
Header file for the IsColumnMajorMatrix type trait.
SelectType< IsComputation< MT2 >::value, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:238
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
DMatTDMatMultExpr< MT1, MT2 > This
Type of this DMatTDMatMultExpr instance.
Definition: DMatTDMatMultExpr.h:219
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:246
Header file for the TDVecSMatMultExprTrait class template.
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:122
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:252
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:307
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:226
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
SelectType< IsComputation< MT1 >::value, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:235
Header file for the TDMatSVecMultExprTrait class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:351
Header file for the DenseMatrix base class.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:121
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:123
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatTDMatMultExpr.h:224
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:297
Header file for the IsNumeric type trait.
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:220
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
const size_t DMATTDMATMULT_THRESHOLD
Row-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies the ...
Definition: Thresholds.h:136
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:339
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:317
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:225
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
Header file for basic type definitions.
Header file for the IsComplex type trait.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:358
Header file for the complex data type.
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:114
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:120
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:359
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:327
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:222
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Operand matrix_
The dense matrix containing the submatrix.
Definition: DenseSubmatrix.h:2792
Header file for the TDVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:232
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.