DMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <blaze/math/blas/Level3.h>
56 #include <blaze/math/Functions.h>
57 #include <blaze/math/Intrinsics.h>
58 #include <blaze/math/shims/Reset.h>
93 #include <blaze/system/BLAS.h>
95 #include <blaze/util/Assert.h>
96 #include <blaze/util/Complex.h>
100 #include <blaze/util/DisableIf.h>
101 #include <blaze/util/EnableIf.h>
102 #include <blaze/util/InvalidType.h>
104 #include <blaze/util/mpl/And.h>
105 #include <blaze/util/mpl/Not.h>
106 #include <blaze/util/mpl/Or.h>
107 #include <blaze/util/SelectType.h>
108 #include <blaze/util/Types.h>
115 
116 
117 namespace blaze {
118 
119 //=================================================================================================
120 //
121 // CLASS DMATDMATMULTEXPR
122 //
123 //=================================================================================================
124 
125 //*************************************************************************************************
132 template< typename MT1 // Type of the left-hand side dense matrix
133  , typename MT2 > // Type of the right-hand side dense matrix
134 class DMatDMatMultExpr : public DenseMatrix< DMatDMatMultExpr<MT1,MT2>, false >
135  , private MatMatMultExpr
136  , private Computation
137 {
138  private:
139  //**Type definitions****************************************************************************
140  typedef typename MT1::ResultType RT1;
141  typedef typename MT2::ResultType RT2;
142  typedef typename RT1::ElementType ET1;
143  typedef typename RT2::ElementType ET2;
144  typedef typename MT1::CompositeType CT1;
145  typedef typename MT2::CompositeType CT2;
146  //**********************************************************************************************
147 
148  //**********************************************************************************************
151  //**********************************************************************************************
152 
153  //**********************************************************************************************
155  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
156  //**********************************************************************************************
157 
158  //**********************************************************************************************
160 
166  template< typename T1, typename T2, typename T3 >
167  struct CanExploitSymmetry {
168  enum { value = IsColumnMajorMatrix<T1>::value &&
169  ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
170  };
172  //**********************************************************************************************
173 
174  //**********************************************************************************************
176 
180  template< typename T1, typename T2, typename T3 >
181  struct IsEvaluationRequired {
182  enum { value = ( evaluateLeft || evaluateRight ) &&
183  !CanExploitSymmetry<T1,T2,T3>::value };
184  };
186  //**********************************************************************************************
187 
188  //**********************************************************************************************
190 
193  template< typename T1, typename T2, typename T3 >
194  struct UseSinglePrecisionKernel {
195  enum { value = BLAZE_BLAS_MODE &&
196  HasMutableDataAccess<T1>::value &&
197  HasConstDataAccess<T2>::value &&
198  HasConstDataAccess<T3>::value &&
199  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
200  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
201  IsFloat<typename T1::ElementType>::value &&
202  IsFloat<typename T2::ElementType>::value &&
203  IsFloat<typename T3::ElementType>::value };
204  };
206  //**********************************************************************************************
207 
208  //**********************************************************************************************
210 
213  template< typename T1, typename T2, typename T3 >
214  struct UseDoublePrecisionKernel {
215  enum { value = BLAZE_BLAS_MODE &&
216  HasMutableDataAccess<T1>::value &&
217  HasConstDataAccess<T2>::value &&
218  HasConstDataAccess<T3>::value &&
219  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
220  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
221  IsDouble<typename T1::ElementType>::value &&
222  IsDouble<typename T2::ElementType>::value &&
223  IsDouble<typename T3::ElementType>::value };
224  };
226  //**********************************************************************************************
227 
228  //**********************************************************************************************
230 
234  template< typename T1, typename T2, typename T3 >
235  struct UseSinglePrecisionComplexKernel {
236  typedef complex<float> Type;
237  enum { value = BLAZE_BLAS_MODE &&
238  HasMutableDataAccess<T1>::value &&
239  HasConstDataAccess<T2>::value &&
240  HasConstDataAccess<T3>::value &&
241  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
242  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
243  IsSame<typename T1::ElementType,Type>::value &&
244  IsSame<typename T2::ElementType,Type>::value &&
245  IsSame<typename T3::ElementType,Type>::value };
246  };
248  //**********************************************************************************************
249 
250  //**********************************************************************************************
252 
256  template< typename T1, typename T2, typename T3 >
257  struct UseDoublePrecisionComplexKernel {
258  typedef complex<double> Type;
259  enum { value = BLAZE_BLAS_MODE &&
260  HasMutableDataAccess<T1>::value &&
261  HasConstDataAccess<T2>::value &&
262  HasConstDataAccess<T3>::value &&
263  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
264  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
265  IsSame<typename T1::ElementType,Type>::value &&
266  IsSame<typename T2::ElementType,Type>::value &&
267  IsSame<typename T3::ElementType,Type>::value };
268  };
270  //**********************************************************************************************
271 
272  //**********************************************************************************************
274 
277  template< typename T1, typename T2, typename T3 >
278  struct UseDefaultKernel {
279  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
280  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
281  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
282  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
283  };
285  //**********************************************************************************************
286 
287  //**********************************************************************************************
289 
292  template< typename T1, typename T2, typename T3 >
293  struct UseVectorizedDefaultKernel {
294  enum { value = !IsDiagonal<T3>::value &&
295  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
296  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
297  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
298  IntrinsicTrait<typename T1::ElementType>::addition &&
299  IntrinsicTrait<typename T1::ElementType>::subtraction &&
300  IntrinsicTrait<typename T1::ElementType>::multiplication };
301  };
303  //**********************************************************************************************
304 
305  public:
306  //**Type definitions****************************************************************************
313  typedef const ElementType ReturnType;
314  typedef const ResultType CompositeType;
315 
317  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
318 
320  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
321 
324 
327  //**********************************************************************************************
328 
329  //**Compilation flags***************************************************************************
331  enum { vectorizable = !IsDiagonal<MT2>::value &&
332  MT1::vectorizable && MT2::vectorizable &&
336 
338  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
339  !evaluateRight && MT2::smpAssignable };
340  //**********************************************************************************************
341 
342  //**Constructor*********************************************************************************
348  explicit inline DMatDMatMultExpr( const MT1& lhs, const MT2& rhs )
349  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
350  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
351  {
352  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
353  }
354  //**********************************************************************************************
355 
356  //**Access operator*****************************************************************************
363  inline ReturnType operator()( size_t i, size_t j ) const {
364  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
365  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
366 
367  const size_t kbegin( ( IsUpper<MT1>::value )
368  ?( ( IsLower<MT2>::value )
369  ?( max( ( IsStrictlyUpper<MT1>::value ? i+1UL : i )
370  , ( IsStrictlyLower<MT2>::value ? j+1UL : j ) ) )
371  :( IsStrictlyUpper<MT1>::value ? i+1UL : i ) )
372  :( ( IsLower<MT2>::value )
373  ?( IsStrictlyLower<MT2>::value ? j+1UL : j )
374  :( 0UL ) ) );
375  const size_t kend( ( IsLower<MT1>::value )
376  ?( ( IsUpper<MT2>::value )
377  ?( min( ( IsStrictlyLower<MT1>::value ? i : i+1UL )
378  , ( IsStrictlyUpper<MT2>::value ? j : j+1UL ) ) )
379  :( IsStrictlyLower<MT1>::value ? i : i+1UL ) )
380  :( ( IsUpper<MT2>::value )
381  ?( IsStrictlyUpper<MT2>::value ? j : j+1UL )
382  :( lhs_.columns() ) ) );
383 
384  if( lhs_.columns() == 0UL ||
385  ( ( IsTriangular<MT1>::value || IsTriangular<MT2>::value ) && kbegin >= kend ) )
386  return ElementType();
387 
389  return lhs_(i,i) * rhs_(i,j);
390 
392  return lhs_(i,j) * rhs_(j,j);
393 
394  const size_t knum( kend - kbegin );
395  const size_t kpos( kbegin + ( ( knum - 1UL ) & size_t(-2) ) + 1UL );
396 
397  ElementType tmp( lhs_(i,kbegin) * rhs_(kbegin,j) );
398 
399  for( size_t k=kbegin+1UL; k<kpos; k+=2UL ) {
400  tmp += lhs_(i,k ) * rhs_(k ,j);
401  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
402  }
403  if( kpos < kend ) {
404  tmp += lhs_(i,kpos) * rhs_(kpos,j);
405  }
406 
407  return tmp;
408  }
409  //**********************************************************************************************
410 
411  //**Rows function*******************************************************************************
416  inline size_t rows() const {
417  return lhs_.rows();
418  }
419  //**********************************************************************************************
420 
421  //**Columns function****************************************************************************
426  inline size_t columns() const {
427  return rhs_.columns();
428  }
429  //**********************************************************************************************
430 
431  //**Left operand access*************************************************************************
436  inline LeftOperand leftOperand() const {
437  return lhs_;
438  }
439  //**********************************************************************************************
440 
441  //**Right operand access************************************************************************
446  inline RightOperand rightOperand() const {
447  return rhs_;
448  }
449  //**********************************************************************************************
450 
451  //**********************************************************************************************
457  template< typename T >
458  inline bool canAlias( const T* alias ) const {
459  return ( lhs_.canAlias( alias ) || rhs_.canAlias( alias ) );
460  }
461  //**********************************************************************************************
462 
463  //**********************************************************************************************
469  template< typename T >
470  inline bool isAliased( const T* alias ) const {
471  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
472  }
473  //**********************************************************************************************
474 
475  //**********************************************************************************************
480  inline bool isAligned() const {
481  return lhs_.isAligned() && rhs_.isAligned();
482  }
483  //**********************************************************************************************
484 
485  //**********************************************************************************************
490  inline bool canSMPAssign() const {
491  return ( !BLAZE_BLAS_IS_PARALLEL ||
492  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
494  }
495  //**********************************************************************************************
496 
497  private:
498  //**Member variables****************************************************************************
499  LeftOperand lhs_;
500  RightOperand rhs_;
501  //**********************************************************************************************
502 
503  //**Assignment to dense matrices****************************************************************
516  template< typename MT // Type of the target dense matrix
517  , bool SO > // Storage order of the target dense matrix
518  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
519  assign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
520  {
522 
523  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
524  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
525 
526  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
527  return;
528  }
529  else if( rhs.lhs_.columns() == 0UL ) {
530  reset( ~lhs );
531  return;
532  }
533 
534  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
535  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
536 
537  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
538  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
539  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
540  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
541  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
542  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
543 
544  DMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
545  }
547  //**********************************************************************************************
548 
549  //**Assignment to dense matrices (kernel selection)*********************************************
560  template< typename MT3 // Type of the left-hand side target matrix
561  , typename MT4 // Type of the left-hand side matrix operand
562  , typename MT5 > // Type of the right-hand side matrix operand
563  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
564  {
565  if( ( IsDiagonal<MT5>::value ) ||
566  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
567  selectSmallAssignKernel( C, A, B );
568  else
569  selectBlasAssignKernel( C, A, B );
570  }
572  //**********************************************************************************************
573 
574  //**Default assignment to dense matrices (general/general)**************************************
588  template< typename MT3 // Type of the left-hand side target matrix
589  , typename MT4 // Type of the left-hand side matrix operand
590  , typename MT5 > // Type of the right-hand side matrix operand
591  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
592  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
593  {
594  const size_t M( A.rows() );
595  const size_t N( B.columns() );
596  const size_t K( A.columns() );
597 
598  for( size_t i=0UL; i<M; ++i )
599  {
600  const size_t kbegin( ( IsUpper<MT4>::value )
601  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
602  :( 0UL ) );
603  const size_t kend( ( IsLower<MT4>::value )
604  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
605  :( K ) );
606  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
607 
608  if( IsStrictlyTriangular<MT4>::value && kbegin == kend ) {
609  for( size_t j=0UL; j<N; ++j ) {
610  reset( (~C)(i,j) );
611  }
612  continue;
613  }
614 
615  {
616  const size_t jbegin( ( IsUpper<MT5>::value )
617  ?( IsStrictlyUpper<MT5>::value ? kbegin+1UL : kbegin )
618  :( 0UL ) );
619  const size_t jend( ( IsLower<MT5>::value )
620  ?( IsStrictlyLower<MT5>::value ? kbegin : kbegin+1UL )
621  :( N ) );
622  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
623 
624  if( IsUpper<MT4>::value && IsUpper<MT5>::value ) {
625  for( size_t j=0UL; j<jbegin; ++j ) {
626  reset( C(i,j) );
627  }
628  }
629  else if( IsStrictlyUpper<MT5>::value ) {
630  reset( C(i,0UL) );
631  }
632  for( size_t j=jbegin; j<jend; ++j ) {
633  C(i,j) = A(i,kbegin) * B(kbegin,j);
634  }
635  if( IsLower<MT4>::value && IsLower<MT5>::value ) {
636  for( size_t j=jend; j<N; ++j ) {
637  reset( C(i,j) );
638  }
639  }
640  else if( IsStrictlyLower<MT5>::value ) {
641  reset( C(i,N-1UL) );
642  }
643  }
644 
645  for( size_t k=kbegin+1UL; k<kend; ++k )
646  {
647  const size_t jbegin( ( IsUpper<MT5>::value )
648  ?( IsStrictlyUpper<MT5>::value ? k+1UL : k )
649  :( 0UL ) );
650  const size_t jend( ( IsLower<MT5>::value )
651  ?( IsStrictlyLower<MT5>::value ? k-1UL : k )
652  :( N ) );
653  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
654 
655  for( size_t j=jbegin; j<jend; ++j ) {
656  C(i,j) += A(i,k) * B(k,j);
657  }
658  if( IsLower<MT5>::value ) {
659  C(i,jend) = A(i,k) * B(k,jend);
660  }
661  }
662  }
663  }
665  //**********************************************************************************************
666 
667  //**Default assignment to dense matrices (general/diagonal)*************************************
681  template< typename MT3 // Type of the left-hand side target matrix
682  , typename MT4 // Type of the left-hand side matrix operand
683  , typename MT5 > // Type of the right-hand side matrix operand
684  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
685  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
686  {
688 
689  const size_t M( A.rows() );
690  const size_t N( B.columns() );
691 
692  for( size_t i=0UL; i<M; ++i )
693  {
694  const size_t jbegin( ( IsUpper<MT4>::value )
695  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
696  :( 0UL ) );
697  const size_t jend( ( IsLower<MT4>::value )
698  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
699  :( N ) );
700  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
701 
702  if( IsUpper<MT4>::value ) {
703  for( size_t j=0UL; j<jbegin; ++j ) {
704  reset( C(i,j) );
705  }
706  }
707  for( size_t j=jbegin; j<jend; ++j ) {
708  C(i,j) = A(i,j) * B(j,j);
709  }
710  if( IsLower<MT4>::value ) {
711  for( size_t j=jend; j<N; ++j ) {
712  reset( C(i,j) );
713  }
714  }
715  }
716  }
718  //**********************************************************************************************
719 
720  //**Default assignment to dense matrices (diagonal/general)*************************************
734  template< typename MT3 // Type of the left-hand side target matrix
735  , typename MT4 // Type of the left-hand side matrix operand
736  , typename MT5 > // Type of the right-hand side matrix operand
737  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
738  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
739  {
741 
742  const size_t M( A.rows() );
743  const size_t N( B.columns() );
744 
745  for( size_t i=0UL; i<M; ++i )
746  {
747  const size_t jbegin( ( IsUpper<MT5>::value )
748  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
749  :( 0UL ) );
750  const size_t jend( ( IsLower<MT5>::value )
751  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
752  :( N ) );
753  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
754 
755  if( IsUpper<MT5>::value ) {
756  for( size_t j=0UL; j<jbegin; ++j ) {
757  reset( C(i,j) );
758  }
759  }
760  for( size_t j=jbegin; j<jend; ++j ) {
761  C(i,j) = A(i,i) * B(i,j);
762  }
763  if( IsLower<MT5>::value ) {
764  for( size_t j=jend; j<N; ++j ) {
765  reset( C(i,j) );
766  }
767  }
768  }
769  }
771  //**********************************************************************************************
772 
773  //**Default assignment to dense matrices (diagonal/diagonal)************************************
787  template< typename MT3 // Type of the left-hand side target matrix
788  , typename MT4 // Type of the left-hand side matrix operand
789  , typename MT5 > // Type of the right-hand side matrix operand
790  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
791  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
792  {
794 
795  reset( C );
796 
797  for( size_t i=0UL; i<A.rows(); ++i ) {
798  C(i,i) = A(i,i) * B(i,i);
799  }
800  }
802  //**********************************************************************************************
803 
804  //**Default assignment to dense matrices (small matrices)***************************************
817  template< typename MT3 // Type of the left-hand side target matrix
818  , typename MT4 // Type of the left-hand side matrix operand
819  , typename MT5 > // Type of the right-hand side matrix operand
820  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
821  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B )
822  {
823  selectDefaultAssignKernel( C, A, B );
824  }
826  //**********************************************************************************************
827 
828  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
843  template< typename MT3 // Type of the left-hand side target matrix
844  , typename MT4 // Type of the left-hand side matrix operand
845  , typename MT5 > // Type of the right-hand side matrix operand
846  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
847  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
848  {
849  typedef IntrinsicTrait<ElementType> IT;
850 
851  const size_t M( A.rows() );
852  const size_t N( B.columns() );
853  const size_t K( A.columns() );
854 
855  size_t j( 0UL );
856 
857  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
858  for( size_t i=0UL; i<M; ++i )
859  {
860  const size_t kbegin( ( IsUpper<MT4>::value )
861  ?( ( IsLower<MT5>::value )
862  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
863  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
864  :( IsLower<MT5>::value ? j : 0UL ) );
865  const size_t kend( ( IsLower<MT4>::value )
866  ?( ( IsUpper<MT5>::value )
867  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+IT::size*8UL, K ) )
868  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
869  :( IsUpper<MT5>::value ? min( j+IT::size*8UL, K ) : K ) );
870 
871  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
872 
873  for( size_t k=kbegin; k<kend; ++k ) {
874  const IntrinsicType a1( set( A(i,k) ) );
875  xmm1 = xmm1 + a1 * B.load(k,j );
876  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
877  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
878  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
879  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
880  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
881  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
882  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
883  }
884 
885  (~C).store( i, j , xmm1 );
886  (~C).store( i, j+IT::size , xmm2 );
887  (~C).store( i, j+IT::size*2UL, xmm3 );
888  (~C).store( i, j+IT::size*3UL, xmm4 );
889  (~C).store( i, j+IT::size*4UL, xmm5 );
890  (~C).store( i, j+IT::size*5UL, xmm6 );
891  (~C).store( i, j+IT::size*6UL, xmm7 );
892  (~C).store( i, j+IT::size*7UL, xmm8 );
893  }
894  }
895 
896  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
897  {
898  size_t i( 0UL );
899 
900  for( ; (i+2UL) <= M; i+=2UL )
901  {
902  const size_t kbegin( ( IsUpper<MT4>::value )
903  ?( ( IsLower<MT5>::value )
904  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
905  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
906  :( IsLower<MT5>::value ? j : 0UL ) );
907  const size_t kend( ( IsLower<MT4>::value )
908  ?( ( IsUpper<MT5>::value )
909  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*4UL, K ) )
910  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
911  :( IsUpper<MT5>::value ? min( j+IT::size*4UL, K ) : K ) );
912 
913  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
914 
915  for( size_t k=kbegin; k<kend; ++k ) {
916  const IntrinsicType a1( set( A(i ,k) ) );
917  const IntrinsicType a2( set( A(i+1UL,k) ) );
918  const IntrinsicType b1( B.load(k,j ) );
919  const IntrinsicType b2( B.load(k,j+IT::size ) );
920  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
921  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
922  xmm1 = xmm1 + a1 * b1;
923  xmm2 = xmm2 + a1 * b2;
924  xmm3 = xmm3 + a1 * b3;
925  xmm4 = xmm4 + a1 * b4;
926  xmm5 = xmm5 + a2 * b1;
927  xmm6 = xmm6 + a2 * b2;
928  xmm7 = xmm7 + a2 * b3;
929  xmm8 = xmm8 + a2 * b4;
930  }
931 
932  (~C).store( i , j , xmm1 );
933  (~C).store( i , j+IT::size , xmm2 );
934  (~C).store( i , j+IT::size*2UL, xmm3 );
935  (~C).store( i , j+IT::size*3UL, xmm4 );
936  (~C).store( i+1UL, j , xmm5 );
937  (~C).store( i+1UL, j+IT::size , xmm6 );
938  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
939  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
940  }
941 
942  if( i < M )
943  {
944  const size_t kbegin( ( IsUpper<MT4>::value )
945  ?( ( IsLower<MT5>::value )
946  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
947  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
948  :( IsLower<MT5>::value ? j : 0UL ) );
949  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, K ) ):( K ) );
950 
951  IntrinsicType xmm1, xmm2, xmm3, xmm4;
952 
953  for( size_t k=kbegin; k<kend; ++k ) {
954  const IntrinsicType a1( set( A(i,k) ) );
955  xmm1 = xmm1 + a1 * B.load(k,j );
956  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
957  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
958  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
959  }
960 
961  (~C).store( i, j , xmm1 );
962  (~C).store( i, j+IT::size , xmm2 );
963  (~C).store( i, j+IT::size*2UL, xmm3 );
964  (~C).store( i, j+IT::size*3UL, xmm4 );
965  }
966  }
967 
968  for( ; (j+IT::size) < N; j+=IT::size*2UL )
969  {
970  size_t i( 0UL );
971 
972  for( ; (i+2UL) <= M; i+=2UL )
973  {
974  const size_t kbegin( ( IsUpper<MT4>::value )
975  ?( ( IsLower<MT5>::value )
976  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
977  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
978  :( IsLower<MT5>::value ? j : 0UL ) );
979  const size_t kend( ( IsLower<MT4>::value )
980  ?( ( IsUpper<MT5>::value )
981  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*2UL, K ) )
982  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
983  :( IsUpper<MT5>::value ? min( j+IT::size*2UL, K ) : K ) );
984 
985  IntrinsicType xmm1, xmm2, xmm3, xmm4;
986 
987  for( size_t k=kbegin; k<kend; ++k ) {
988  const IntrinsicType a1( set( A(i ,k) ) );
989  const IntrinsicType a2( set( A(i+1UL,k) ) );
990  const IntrinsicType b1( B.load(k,j ) );
991  const IntrinsicType b2( B.load(k,j+IT::size) );
992  xmm1 = xmm1 + a1 * b1;
993  xmm2 = xmm2 + a1 * b2;
994  xmm3 = xmm3 + a2 * b1;
995  xmm4 = xmm4 + a2 * b2;
996  }
997 
998  (~C).store( i , j , xmm1 );
999  (~C).store( i , j+IT::size, xmm2 );
1000  (~C).store( i+1UL, j , xmm3 );
1001  (~C).store( i+1UL, j+IT::size, xmm4 );
1002  }
1003 
1004  if( i < M )
1005  {
1006  const size_t kbegin( ( IsUpper<MT4>::value )
1007  ?( ( IsLower<MT5>::value )
1008  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1009  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1010  :( IsLower<MT5>::value ? j : 0UL ) );
1011  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, K ) ):( K ) );
1012 
1013  IntrinsicType xmm1, xmm2;
1014 
1015  for( size_t k=kbegin; k<kend; ++k ) {
1016  const IntrinsicType a1( set( A(i,k) ) );
1017  xmm1 = xmm1 + a1 * B.load(k,j );
1018  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
1019  }
1020 
1021  (~C).store( i, j , xmm1 );
1022  (~C).store( i, j+IT::size, xmm2 );
1023  }
1024  }
1025 
1026  if( j < N )
1027  {
1028  size_t i( 0UL );
1029 
1030  for( ; (i+2UL) <= M; i+=2UL )
1031  {
1032  const size_t kbegin( ( IsUpper<MT4>::value )
1033  ?( ( IsLower<MT5>::value )
1034  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1035  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1036  :( IsLower<MT5>::value ? j : 0UL ) );
1037  const size_t kend( ( IsLower<MT4>::value )
1038  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
1039  :( K ) );
1040 
1041  IntrinsicType xmm1, xmm2;
1042 
1043  for( size_t k=kbegin; k<kend; ++k ) {
1044  const IntrinsicType b1( B.load(k,j) );
1045  xmm1 = xmm1 + set( A(i ,k) ) * b1;
1046  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
1047  }
1048 
1049  (~C).store( i , j, xmm1 );
1050  (~C).store( i+1UL, j, xmm2 );
1051  }
1052 
1053  if( i < M )
1054  {
1055  const size_t kbegin( ( IsUpper<MT4>::value )
1056  ?( ( IsLower<MT5>::value )
1057  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1058  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1059  :( IsLower<MT5>::value ? j : 0UL ) );
1060 
1061  IntrinsicType xmm1;
1062 
1063  for( size_t k=kbegin; k<K; ++k ) {
1064  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
1065  }
1066 
1067  (~C).store( i, j, xmm1 );
1068  }
1069  }
1070  }
1072  //**********************************************************************************************
1073 
1074  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
1089  template< typename MT3 // Type of the left-hand side target matrix
1090  , typename MT4 // Type of the left-hand side matrix operand
1091  , typename MT5 > // Type of the right-hand side matrix operand
1092  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1093  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1094  {
1099 
1100  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1101  const typename MT4::OppositeType tmp( serial( A ) );
1102  assign( ~C, tmp * B );
1103  }
1104  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1105  const typename MT5::OppositeType tmp( serial( B ) );
1106  assign( ~C, A * tmp );
1107  }
1108  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1109  const typename MT4::OppositeType tmp( serial( A ) );
1110  assign( ~C, tmp * B );
1111  }
1112  else {
1113  const typename MT5::OppositeType tmp( serial( B ) );
1114  assign( ~C, A * tmp );
1115  }
1116  }
1118  //**********************************************************************************************
1119 
1120  //**Default assignment to dense matrices (large matrices)***************************************
1133  template< typename MT3 // Type of the left-hand side target matrix
1134  , typename MT4 // Type of the left-hand side matrix operand
1135  , typename MT5 > // Type of the right-hand side matrix operand
1136  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1137  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1138  {
1139  selectDefaultAssignKernel( C, A, B );
1140  }
1142  //**********************************************************************************************
1143 
1144  //**Vectorized default assignment to row-major dense matrices (large matrices)******************
1159  template< typename MT3 // Type of the left-hand side target matrix
1160  , typename MT4 // Type of the left-hand side matrix operand
1161  , typename MT5 > // Type of the right-hand side matrix operand
1162  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1163  selectLargeAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1164  {
1165  typedef IntrinsicTrait<ElementType> IT;
1166 
1167  const size_t M( A.rows() );
1168  const size_t N( B.columns() );
1169  const size_t K( A.columns() );
1170 
1171  const size_t iblock( 64UL );
1172  const size_t jblock( 128UL );
1173  const size_t kblock( 128UL );
1174 
1175  for( size_t jj=0UL; jj<N; jj+=jblock )
1176  {
1177  const size_t jend( min( jj+jblock, N ) );
1178 
1179  for( size_t ii=0UL; ii<M; ii+=iblock )
1180  {
1181  const size_t iend( min( ii+iblock, M ) );
1182 
1183  for( size_t i=ii; i<iend; ++i ) {
1184  for( size_t j=jj; j<jend; ++j ) {
1185  reset( (~C)(i,j) );
1186  }
1187  }
1188 
1189  for( size_t kk=0UL; kk<K; kk+=kblock )
1190  {
1191  const size_t ktmp( min( kk+kblock, K ) );
1192 
1193  size_t j( jj );
1194 
1195  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
1196  {
1197  const size_t j1( j+IT::size );
1198  const size_t j2( j+IT::size*2UL );
1199  const size_t j3( j+IT::size*3UL );
1200 
1201  size_t i( ii );
1202 
1203  for( ; (i+2UL) <= iend; i+=2UL )
1204  {
1205  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1206  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1207  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
1208  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
1209 
1210  IntrinsicType xmm1( (~C).load(i ,j ) );
1211  IntrinsicType xmm2( (~C).load(i ,j1) );
1212  IntrinsicType xmm3( (~C).load(i ,j2) );
1213  IntrinsicType xmm4( (~C).load(i ,j3) );
1214  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
1215  IntrinsicType xmm6( (~C).load(i+1UL,j1) );
1216  IntrinsicType xmm7( (~C).load(i+1UL,j2) );
1217  IntrinsicType xmm8( (~C).load(i+1UL,j3) );
1218 
1219  for( size_t k=kbegin; k<kend; ++k ) {
1220  const IntrinsicType a1( set( A(i ,k) ) );
1221  const IntrinsicType a2( set( A(i+1UL,k) ) );
1222  const IntrinsicType b1( B.load(k,j ) );
1223  const IntrinsicType b2( B.load(k,j1) );
1224  const IntrinsicType b3( B.load(k,j2) );
1225  const IntrinsicType b4( B.load(k,j3) );
1226  xmm1 = xmm1 + a1 * b1;
1227  xmm2 = xmm2 + a1 * b2;
1228  xmm3 = xmm3 + a1 * b3;
1229  xmm4 = xmm4 + a1 * b4;
1230  xmm5 = xmm5 + a2 * b1;
1231  xmm6 = xmm6 + a2 * b2;
1232  xmm7 = xmm7 + a2 * b3;
1233  xmm8 = xmm8 + a2 * b4;
1234  }
1235 
1236  (~C).store( i , j , xmm1 );
1237  (~C).store( i , j1, xmm2 );
1238  (~C).store( i , j2, xmm3 );
1239  (~C).store( i , j3, xmm4 );
1240  (~C).store( i+1UL, j , xmm5 );
1241  (~C).store( i+1UL, j1, xmm6 );
1242  (~C).store( i+1UL, j2, xmm7 );
1243  (~C).store( i+1UL, j3, xmm8 );
1244  }
1245 
1246  if( i < iend )
1247  {
1248  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1249  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1250  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
1251  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
1252 
1253  IntrinsicType xmm1( (~C).load(i,j ) );
1254  IntrinsicType xmm2( (~C).load(i,j1) );
1255  IntrinsicType xmm3( (~C).load(i,j2) );
1256  IntrinsicType xmm4( (~C).load(i,j3) );
1257 
1258  for( size_t k=kbegin; k<kend; ++k ) {
1259  const IntrinsicType a1( set( A(i,k) ) );
1260  xmm1 = xmm1 + a1 * B.load(k,j );
1261  xmm2 = xmm2 + a1 * B.load(k,j1);
1262  xmm3 = xmm3 + a1 * B.load(k,j2);
1263  xmm4 = xmm4 + a1 * B.load(k,j3);
1264  }
1265 
1266  (~C).store( i, j , xmm1 );
1267  (~C).store( i, j1, xmm2 );
1268  (~C).store( i, j2, xmm3 );
1269  (~C).store( i, j3, xmm4 );
1270  }
1271  }
1272 
1273  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
1274  {
1275  const size_t j1( j+IT::size );
1276 
1277  size_t i( ii );
1278 
1279  for( ; (i+4UL) <= iend; i+=4UL )
1280  {
1281  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1282  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1283  const size_t kend ( min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
1284  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
1285 
1286  IntrinsicType xmm1( (~C).load(i ,j ) );
1287  IntrinsicType xmm2( (~C).load(i ,j1) );
1288  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1289  IntrinsicType xmm4( (~C).load(i+1UL,j1) );
1290  IntrinsicType xmm5( (~C).load(i+2UL,j ) );
1291  IntrinsicType xmm6( (~C).load(i+2UL,j1) );
1292  IntrinsicType xmm7( (~C).load(i+3UL,j ) );
1293  IntrinsicType xmm8( (~C).load(i+3UL,j1) );
1294 
1295  for( size_t k=kbegin; k<kend; ++k ) {
1296  const IntrinsicType a1( set( A(i ,k) ) );
1297  const IntrinsicType a2( set( A(i+1UL,k) ) );
1298  const IntrinsicType a3( set( A(i+2UL,k) ) );
1299  const IntrinsicType a4( set( A(i+3UL,k) ) );
1300  const IntrinsicType b1( B.load(k,j ) );
1301  const IntrinsicType b2( B.load(k,j1) );
1302  xmm1 = xmm1 + a1 * b1;
1303  xmm2 = xmm2 + a1 * b2;
1304  xmm3 = xmm3 + a2 * b1;
1305  xmm4 = xmm4 + a2 * b2;
1306  xmm5 = xmm5 + a3 * b1;
1307  xmm6 = xmm6 + a3 * b2;
1308  xmm7 = xmm7 + a4 * b1;
1309  xmm8 = xmm8 + a4 * b2;
1310  }
1311 
1312  (~C).store( i , j , xmm1 );
1313  (~C).store( i , j1, xmm2 );
1314  (~C).store( i+1UL, j , xmm3 );
1315  (~C).store( i+1UL, j1, xmm4 );
1316  (~C).store( i+2UL, j , xmm5 );
1317  (~C).store( i+2UL, j1, xmm6 );
1318  (~C).store( i+3UL, j , xmm7 );
1319  (~C).store( i+3UL, j1, xmm8 );
1320  }
1321 
1322  for( ; (i+2UL) <= iend; i+=2UL )
1323  {
1324  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1325  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1326  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
1327  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
1328 
1329  IntrinsicType xmm1( (~C).load(i ,j ) );
1330  IntrinsicType xmm2( (~C).load(i ,j1) );
1331  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
1332  IntrinsicType xmm4( (~C).load(i+1UL,j1) );
1333 
1334  for( size_t k=kbegin; k<kend; ++k ) {
1335  const IntrinsicType a1( set( A(i ,k) ) );
1336  const IntrinsicType a2( set( A(i+1UL,k) ) );
1337  const IntrinsicType b1( B.load(k,j ) );
1338  const IntrinsicType b2( B.load(k,j1) );
1339  xmm1 = xmm1 + a1 * b1;
1340  xmm2 = xmm2 + a1 * b2;
1341  xmm3 = xmm3 + a2 * b1;
1342  xmm4 = xmm4 + a2 * b2;
1343  }
1344 
1345  (~C).store( i , j , xmm1 );
1346  (~C).store( i , j1, xmm2 );
1347  (~C).store( i+1UL, j , xmm3 );
1348  (~C).store( i+1UL, j1, xmm4 );
1349  }
1350 
1351  if( i < iend )
1352  {
1353  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1354  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1355  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
1356  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
1357 
1358  IntrinsicType xmm1( (~C).load(i,j ) );
1359  IntrinsicType xmm2( (~C).load(i,j1) );
1360 
1361  for( size_t k=kbegin; k<kend; ++k ) {
1362  const IntrinsicType a1( set( A(i,k) ) );
1363  xmm1 = xmm1 + a1 * B.load(k,j );
1364  xmm2 = xmm2 + a1 * B.load(k,j1);
1365  }
1366 
1367  (~C).store( i, j , xmm1 );
1368  (~C).store( i, j1, xmm2 );
1369  }
1370  }
1371 
1372  if( j < jend )
1373  {
1374  for( size_t i=ii; i<iend; ++i )
1375  {
1376  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1377  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1378  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
1379  ( IsUpper<MT5>::value )?( min( j+IT::size, ktmp ) ):( ktmp ) ) );
1380 
1381  IntrinsicType xmm1( (~C).load(i,j) );
1382 
1383  for( size_t k=kbegin; k<kend; ++k ) {
1384  const IntrinsicType a1( set( A(i,k) ) );
1385  xmm1 = xmm1 + a1 * B.load(k,j);
1386  }
1387 
1388  (~C).store( i, j, xmm1 );
1389  }
1390  }
1391  }
1392  }
1393  }
1394  }
1396  //**********************************************************************************************
1397 
1398  //**Vectorized default assignment to column-major dense matrices (large matrices)***************
1412  template< typename MT3 // Type of the left-hand side target matrix
1413  , typename MT4 // Type of the left-hand side matrix operand
1414  , typename MT5 > // Type of the right-hand side matrix operand
1415  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1416  selectLargeAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1417  {
1418  selectSmallAssignKernel( ~C, A, B );
1419  }
1421  //**********************************************************************************************
1422 
1423  //**BLAS-based assignment to dense matrices (default)*******************************************
1436  template< typename MT3 // Type of the left-hand side target matrix
1437  , typename MT4 // Type of the left-hand side matrix operand
1438  , typename MT5 > // Type of the right-hand side matrix operand
1439  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1440  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1441  {
1442  selectLargeAssignKernel( C, A, B );
1443  }
1445  //**********************************************************************************************
1446 
1447  //**BLAS-based assignment to dense matrices (single precision)**********************************
1448 #if BLAZE_BLAS_MODE
1449 
1462  template< typename MT3 // Type of the left-hand side target matrix
1463  , typename MT4 // Type of the left-hand side matrix operand
1464  , typename MT5 > // Type of the right-hand side matrix operand
1465  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1466  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1467  {
1468  if( IsTriangular<MT4>::value ) {
1469  assign( C, B );
1470  strmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
1471  }
1472  else if( IsTriangular<MT5>::value ) {
1473  assign( C, A );
1474  strmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
1475  }
1476  else {
1477  sgemm( C, A, B, 1.0F, 0.0F );
1478  }
1479  }
1481 #endif
1482  //**********************************************************************************************
1483 
1484  //**BLAS-based assignment to dense matrices (double precision)**********************************
1485 #if BLAZE_BLAS_MODE
1486 
1499  template< typename MT3 // Type of the left-hand side target matrix
1500  , typename MT4 // Type of the left-hand side matrix operand
1501  , typename MT5 > // Type of the right-hand side matrix operand
1502  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1503  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1504  {
1505  if( IsTriangular<MT4>::value ) {
1506  assign( C, B );
1507  dtrmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
1508  }
1509  else if( IsTriangular<MT5>::value ) {
1510  assign( C, A );
1511  dtrmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
1512  }
1513  else {
1514  dgemm( C, A, B, 1.0, 0.0 );
1515  }
1516  }
1518 #endif
1519  //**********************************************************************************************
1520 
1521  //**BLAS-based assignment to dense matrices (single precision complex)**************************
1522 #if BLAZE_BLAS_MODE
1523 
1536  template< typename MT3 // Type of the left-hand side target matrix
1537  , typename MT4 // Type of the left-hand side matrix operand
1538  , typename MT5 > // Type of the right-hand side matrix operand
1539  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1540  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1541  {
1542  if( IsTriangular<MT4>::value ) {
1543  assign( C, B );
1544  ctrmm( C, A, CblasLeft,
1545  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
1546  complex<float>( 1.0F, 0.0F ) );
1547  }
1548  else if( IsTriangular<MT5>::value ) {
1549  assign( C, A );
1550  ctrmm( C, B, CblasRight,
1551  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
1552  complex<float>( 1.0F, 0.0F ) );
1553  }
1554  else {
1555  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1556  }
1557  }
1559 #endif
1560  //**********************************************************************************************
1561 
1562  //**BLAS-based assignment to dense matrices (double precision complex)**************************
1563 #if BLAZE_BLAS_MODE
1564 
1577  template< typename MT3 // Type of the left-hand side target matrix
1578  , typename MT4 // Type of the left-hand side matrix operand
1579  , typename MT5 > // Type of the right-hand side matrix operand
1580  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1581  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1582  {
1583  if( IsTriangular<MT4>::value ) {
1584  assign( C, B );
1585  ztrmm( C, A, CblasLeft,
1586  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
1587  complex<double>( 1.0, 0.0 ) );
1588  }
1589  else if( IsTriangular<MT5>::value ) {
1590  assign( C, A );
1591  ztrmm( C, B, CblasRight,
1592  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
1593  complex<double>( 1.0, 0.0 ) );
1594  }
1595  else {
1596  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1597  }
1598  }
1600 #endif
1601  //**********************************************************************************************
1602 
1603  //**Assignment to sparse matrices***************************************************************
1616  template< typename MT // Type of the target sparse matrix
1617  , bool SO > // Storage order of the target sparse matrix
1618  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1619  assign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
1620  {
1622 
1623  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1624 
1631 
1632  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1633  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1634 
1635  const TmpType tmp( serial( rhs ) );
1636  assign( ~lhs, tmp );
1637  }
1639  //**********************************************************************************************
1640 
1641  //**Restructuring assignment to column-major matrices*******************************************
1656  template< typename MT > // Type of the target matrix
1657  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1658  assign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
1659  {
1661 
1663 
1664  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1665  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1666 
1667  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1668  assign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
1669  else if( IsSymmetric<MT1>::value )
1670  assign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
1671  else
1672  assign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
1673  }
1675  //**********************************************************************************************
1676 
1677  //**Addition assignment to dense matrices*******************************************************
1690  template< typename MT // Type of the target dense matrix
1691  , bool SO > // Storage order of the target dense matrix
1692  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1693  addAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
1694  {
1696 
1697  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1698  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1699 
1700  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1701  return;
1702  }
1703 
1704  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1705  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1706 
1707  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1708  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1709  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1710  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1711  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1712  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1713 
1714  DMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1715  }
1717  //**********************************************************************************************
1718 
1719  //**Addition assignment to dense matrices (kernel selection)************************************
1730  template< typename MT3 // Type of the left-hand side target matrix
1731  , typename MT4 // Type of the left-hand side matrix operand
1732  , typename MT5 > // Type of the right-hand side matrix operand
1733  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1734  {
1735  if( ( IsDiagonal<MT5>::value ) ||
1736  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
1737  selectSmallAddAssignKernel( C, A, B );
1738  else
1739  selectBlasAddAssignKernel( C, A, B );
1740  }
1742  //**********************************************************************************************
1743 
1744  //**Default addition assignment to dense matrices (general/general)*****************************
1758  template< typename MT3 // Type of the left-hand side target matrix
1759  , typename MT4 // Type of the left-hand side matrix operand
1760  , typename MT5 > // Type of the right-hand side matrix operand
1761  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
1762  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1763  {
1764  const size_t M( A.rows() );
1765  const size_t N( B.columns() );
1766  const size_t K( A.columns() );
1767 
1768  for( size_t i=0UL; i<M; ++i )
1769  {
1770  const size_t kbegin( ( IsUpper<MT4>::value )
1771  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1772  :( 0UL ) );
1773  const size_t kend( ( IsLower<MT4>::value )
1774  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
1775  :( K ) );
1776  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
1777 
1778  for( size_t k=kbegin; k<kend; ++k )
1779  {
1780  const size_t jbegin( ( IsUpper<MT5>::value )
1781  ?( IsStrictlyUpper<MT5>::value ? k+1UL : k )
1782  :( 0UL ) );
1783  const size_t jend( ( IsLower<MT5>::value )
1784  ?( IsStrictlyLower<MT5>::value ? k : k+1UL )
1785  :( N ) );
1786  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1787 
1788  const size_t jnum( jend - jbegin );
1789  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1790 
1791  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1792  C(i,j ) += A(i,k) * B(k,j );
1793  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1794  }
1795  if( jpos < jend ) {
1796  C(i,jpos) += A(i,k) * B(k,jpos);
1797  }
1798  }
1799  }
1800  }
1802  //**********************************************************************************************
1803 
1804  //**Default addition assignment to dense matrices (general/diagonal)****************************
1818  template< typename MT3 // Type of the left-hand side target matrix
1819  , typename MT4 // Type of the left-hand side matrix operand
1820  , typename MT5 > // Type of the right-hand side matrix operand
1821  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
1822  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1823  {
1825 
1826  const size_t M( A.rows() );
1827  const size_t N( B.columns() );
1828 
1829  for( size_t i=0UL; i<M; ++i )
1830  {
1831  const size_t jbegin( ( IsUpper<MT4>::value )
1832  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1833  :( 0UL ) );
1834  const size_t jend( ( IsLower<MT4>::value )
1835  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
1836  :( N ) );
1837  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1838 
1839  const size_t jnum( jend - jbegin );
1840  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1841 
1842  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1843  C(i,j ) += A(i,j ) * B(j ,j );
1844  C(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL);
1845  }
1846  if( jpos < jend ) {
1847  C(i,jpos) += A(i,jpos) * B(jpos,jpos);
1848  }
1849  }
1850  }
1852  //**********************************************************************************************
1853 
1854  //**Default addition assignment to dense matrices (diagonal/general)****************************
1868  template< typename MT3 // Type of the left-hand side target matrix
1869  , typename MT4 // Type of the left-hand side matrix operand
1870  , typename MT5 > // Type of the right-hand side matrix operand
1871  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
1872  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1873  {
1875 
1876  const size_t M( A.rows() );
1877  const size_t N( B.columns() );
1878 
1879  for( size_t i=0UL; i<M; ++i )
1880  {
1881  const size_t jbegin( ( IsUpper<MT5>::value )
1882  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
1883  :( 0UL ) );
1884  const size_t jend( ( IsLower<MT5>::value )
1885  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
1886  :( N ) );
1887  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1888 
1889  const size_t jnum( jend - jbegin );
1890  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1891 
1892  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1893  C(i,j ) += A(i,i) * B(i,j );
1894  C(i,j+1UL) += A(i,i) * B(i,j+1UL);
1895  }
1896  if( jpos < jend ) {
1897  C(i,jpos) += A(i,i) * B(i,jpos);
1898  }
1899  }
1900  }
1902  //**********************************************************************************************
1903 
1904  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
1918  template< typename MT3 // Type of the left-hand side target matrix
1919  , typename MT4 // Type of the left-hand side matrix operand
1920  , typename MT5 > // Type of the right-hand side matrix operand
1921  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
1922  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1923  {
1925 
1926  for( size_t i=0UL; i<A.rows(); ++i ) {
1927  C(i,i) += A(i,i) * B(i,i);
1928  }
1929  }
1931  //**********************************************************************************************
1932 
1933  //**Default addition assignment to dense matrices (small matrices)******************************
1947  template< typename MT3 // Type of the left-hand side target matrix
1948  , typename MT4 // Type of the left-hand side matrix operand
1949  , typename MT5 > // Type of the right-hand side matrix operand
1950  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1951  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1952  {
1953  selectDefaultAddAssignKernel( C, A, B );
1954  }
1956  //**********************************************************************************************
1957 
1958  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
1973  template< typename MT3 // Type of the left-hand side target matrix
1974  , typename MT4 // Type of the left-hand side matrix operand
1975  , typename MT5 > // Type of the right-hand side matrix operand
1976  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1977  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1978  {
1979  typedef IntrinsicTrait<ElementType> IT;
1980 
1981  const size_t M( A.rows() );
1982  const size_t N( B.columns() );
1983  const size_t K( A.columns() );
1984 
1985  size_t j( 0UL );
1986 
1987  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1988  for( size_t i=0UL; i<M; ++i )
1989  {
1990  const size_t kbegin( ( IsUpper<MT4>::value )
1991  ?( ( IsLower<MT5>::value )
1992  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1993  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1994  :( IsLower<MT5>::value ? j : 0UL ) );
1995  const size_t kend( ( IsLower<MT4>::value )
1996  ?( ( IsUpper<MT5>::value )
1997  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+IT::size*8UL, K ) )
1998  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1999  :( IsUpper<MT5>::value ? min( j+IT::size*8UL, K ) : K ) );
2000 
2001  IntrinsicType xmm1( (~C).load(i,j ) );
2002  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
2003  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
2004  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
2005  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
2006  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
2007  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
2008  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
2009 
2010  for( size_t k=kbegin; k<kend; ++k ) {
2011  const IntrinsicType a1( set( A(i,k) ) );
2012  xmm1 = xmm1 + a1 * B.load(k,j );
2013  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2014  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2015  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2016  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
2017  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
2018  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
2019  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
2020  }
2021 
2022  (~C).store( i, j , xmm1 );
2023  (~C).store( i, j+IT::size , xmm2 );
2024  (~C).store( i, j+IT::size*2UL, xmm3 );
2025  (~C).store( i, j+IT::size*3UL, xmm4 );
2026  (~C).store( i, j+IT::size*4UL, xmm5 );
2027  (~C).store( i, j+IT::size*5UL, xmm6 );
2028  (~C).store( i, j+IT::size*6UL, xmm7 );
2029  (~C).store( i, j+IT::size*7UL, xmm8 );
2030  }
2031  }
2032 
2033  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
2034  {
2035  size_t i( 0UL );
2036 
2037  for( ; (i+2UL) <= M; i+=2UL )
2038  {
2039  const size_t kbegin( ( IsUpper<MT4>::value )
2040  ?( ( IsLower<MT5>::value )
2041  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2042  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2043  :( IsLower<MT5>::value ? j : 0UL ) );
2044  const size_t kend( ( IsLower<MT4>::value )
2045  ?( ( IsUpper<MT5>::value )
2046  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*4UL, K ) )
2047  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
2048  :( IsUpper<MT5>::value ? min( j+IT::size*4UL, K ) : K ) );
2049 
2050  IntrinsicType xmm1( (~C).load(i ,j ) );
2051  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
2052  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
2053  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
2054  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
2055  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
2056  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
2057  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
2058 
2059  for( size_t k=kbegin; k<kend; ++k ) {
2060  const IntrinsicType a1( set( A(i ,k) ) );
2061  const IntrinsicType a2( set( A(i+1UL,k) ) );
2062  const IntrinsicType b1( B.load(k,j ) );
2063  const IntrinsicType b2( B.load(k,j+IT::size ) );
2064  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
2065  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
2066  xmm1 = xmm1 + a1 * b1;
2067  xmm2 = xmm2 + a1 * b2;
2068  xmm3 = xmm3 + a1 * b3;
2069  xmm4 = xmm4 + a1 * b4;
2070  xmm5 = xmm5 + a2 * b1;
2071  xmm6 = xmm6 + a2 * b2;
2072  xmm7 = xmm7 + a2 * b3;
2073  xmm8 = xmm8 + a2 * b4;
2074  }
2075 
2076  (~C).store( i , j , xmm1 );
2077  (~C).store( i , j+IT::size , xmm2 );
2078  (~C).store( i , j+IT::size*2UL, xmm3 );
2079  (~C).store( i , j+IT::size*3UL, xmm4 );
2080  (~C).store( i+1UL, j , xmm5 );
2081  (~C).store( i+1UL, j+IT::size , xmm6 );
2082  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
2083  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
2084  }
2085 
2086  if( i < M )
2087  {
2088  const size_t kbegin( ( IsUpper<MT4>::value )
2089  ?( ( IsLower<MT5>::value )
2090  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2091  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2092  :( IsLower<MT5>::value ? j : 0UL ) );
2093  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, K ) ):( K ) );
2094 
2095  IntrinsicType xmm1( (~C).load(i,j ) );
2096  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
2097  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
2098  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
2099 
2100  for( size_t k=kbegin; k<kend; ++k ) {
2101  const IntrinsicType a1( set( A(i,k) ) );
2102  xmm1 = xmm1 + a1 * B.load(k,j );
2103  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2104  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2105  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2106  }
2107 
2108  (~C).store( i, j , xmm1 );
2109  (~C).store( i, j+IT::size , xmm2 );
2110  (~C).store( i, j+IT::size*2UL, xmm3 );
2111  (~C).store( i, j+IT::size*3UL, xmm4 );
2112  }
2113  }
2114 
2115  for( ; (j+IT::size) < N; j+=IT::size*2UL )
2116  {
2117  size_t i( 0UL );
2118 
2119  for( ; (i+2UL) <= M; i+=2UL )
2120  {
2121  const size_t kbegin( ( IsUpper<MT4>::value )
2122  ?( ( IsLower<MT5>::value )
2123  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2124  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2125  :( IsLower<MT5>::value ? j : 0UL ) );
2126  const size_t kend( ( IsLower<MT4>::value )
2127  ?( ( IsUpper<MT5>::value )
2128  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*2UL, K ) )
2129  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
2130  :( IsUpper<MT5>::value ? min( j+IT::size*2UL, K ) : K ) );
2131 
2132  IntrinsicType xmm1( (~C).load(i ,j ) );
2133  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
2134  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
2135  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
2136 
2137  for( size_t k=kbegin; k<kend; ++k ) {
2138  const IntrinsicType a1( set( A(i ,k) ) );
2139  const IntrinsicType a2( set( A(i+1UL,k) ) );
2140  const IntrinsicType b1( B.load(k,j ) );
2141  const IntrinsicType b2( B.load(k,j+IT::size) );
2142  xmm1 = xmm1 + a1 * b1;
2143  xmm2 = xmm2 + a1 * b2;
2144  xmm3 = xmm3 + a2 * b1;
2145  xmm4 = xmm4 + a2 * b2;
2146  }
2147 
2148  (~C).store( i , j , xmm1 );
2149  (~C).store( i , j+IT::size, xmm2 );
2150  (~C).store( i+1UL, j , xmm3 );
2151  (~C).store( i+1UL, j+IT::size, xmm4 );
2152  }
2153 
2154  if( i < M )
2155  {
2156  const size_t kbegin( ( IsUpper<MT4>::value )
2157  ?( ( IsLower<MT5>::value )
2158  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2159  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2160  :( IsLower<MT5>::value ? j : 0UL ) );
2161  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, K ) ):( K ) );
2162 
2163  IntrinsicType xmm1( (~C).load(i,j ) );
2164  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
2165 
2166  for( size_t k=kbegin; k<kend; ++k ) {
2167  const IntrinsicType a1( set( A(i,k) ) );
2168  xmm1 = xmm1 + a1 * B.load(k,j );
2169  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
2170  }
2171 
2172  (~C).store( i, j , xmm1 );
2173  (~C).store( i, j+IT::size, xmm2 );
2174  }
2175  }
2176 
2177  if( j < N )
2178  {
2179  size_t i( 0UL );
2180 
2181  for( ; (i+2UL) <= M; i+=2UL )
2182  {
2183  const size_t kbegin( ( IsUpper<MT4>::value )
2184  ?( ( IsLower<MT5>::value )
2185  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2186  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2187  :( IsLower<MT5>::value ? j : 0UL ) );
2188  const size_t kend( ( IsLower<MT4>::value )
2189  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
2190  :( K ) );
2191 
2192  IntrinsicType xmm1( (~C).load(i ,j) );
2193  IntrinsicType xmm2( (~C).load(i+1UL,j) );
2194 
2195  for( size_t k=kbegin; k<kend; ++k ) {
2196  const IntrinsicType b1( B.load(k,j) );
2197  xmm1 = xmm1 + set( A(i ,k) ) * b1;
2198  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
2199  }
2200 
2201  (~C).store( i , j, xmm1 );
2202  (~C).store( i+1UL, j, xmm2 );
2203  }
2204 
2205  if( i < M )
2206  {
2207  const size_t kbegin( ( IsUpper<MT4>::value )
2208  ?( ( IsLower<MT5>::value )
2209  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2210  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2211  :( IsLower<MT5>::value ? j : 0UL ) );
2212 
2213  IntrinsicType xmm1( (~C).load(i,j) );
2214 
2215  for( size_t k=kbegin; k<K; ++k ) {
2216  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
2217  }
2218 
2219  (~C).store( i, j, xmm1 );
2220  }
2221  }
2222  }
2224  //**********************************************************************************************
2225 
2226  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
2241  template< typename MT3 // Type of the left-hand side target matrix
2242  , typename MT4 // Type of the left-hand side matrix operand
2243  , typename MT5 > // Type of the right-hand side matrix operand
2244  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2245  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2246  {
2251 
2252  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2253  const typename MT4::OppositeType tmp( serial( A ) );
2254  addAssign( ~C, tmp * B );
2255  }
2256  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2257  const typename MT5::OppositeType tmp( serial( B ) );
2258  addAssign( ~C, A * tmp );
2259  }
2260  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2261  const typename MT4::OppositeType tmp( serial( A ) );
2262  addAssign( ~C, tmp * B );
2263  }
2264  else {
2265  const typename MT5::OppositeType tmp( serial( B ) );
2266  addAssign( ~C, A * tmp );
2267  }
2268  }
2270  //**********************************************************************************************
2271 
2272  //**Default addition assignment to dense matrices (large matrices)******************************
2286  template< typename MT3 // Type of the left-hand side target matrix
2287  , typename MT4 // Type of the left-hand side matrix operand
2288  , typename MT5 > // Type of the right-hand side matrix operand
2289  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2290  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2291  {
2292  selectDefaultAddAssignKernel( C, A, B );
2293  }
2295  //**********************************************************************************************
2296 
2297  //**Vectorized default addition assignment to row-major dense matrices (large matrices)*********
2312  template< typename MT3 // Type of the left-hand side target matrix
2313  , typename MT4 // Type of the left-hand side matrix operand
2314  , typename MT5 > // Type of the right-hand side matrix operand
2315  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2316  selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2317  {
2318  typedef IntrinsicTrait<ElementType> IT;
2319 
2320  const size_t M( A.rows() );
2321  const size_t N( B.columns() );
2322  const size_t K( A.columns() );
2323 
2324  const size_t iblock( 64UL );
2325  const size_t jblock( 128UL );
2326  const size_t kblock( 128UL );
2327 
2328  for( size_t jj=0UL; jj<N; jj+=jblock )
2329  {
2330  const size_t jend( min( jj+jblock, N ) );
2331 
2332  for( size_t ii=0UL; ii<M; ii+=iblock )
2333  {
2334  const size_t iend( min( ii+iblock, M ) );
2335 
2336  for( size_t kk=0UL; kk<K; kk+=kblock )
2337  {
2338  const size_t ktmp( min( kk+kblock, K ) );
2339 
2340  size_t j( jj );
2341 
2342  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
2343  {
2344  const size_t j1( j+IT::size );
2345  const size_t j2( j+IT::size*2UL );
2346  const size_t j3( j+IT::size*3UL );
2347 
2348  size_t i( ii );
2349 
2350  for( ; (i+2UL) <= iend; i+=2UL )
2351  {
2352  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2353  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2354  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
2355  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
2356 
2357  IntrinsicType xmm1( (~C).load(i ,j ) );
2358  IntrinsicType xmm2( (~C).load(i ,j1) );
2359  IntrinsicType xmm3( (~C).load(i ,j2) );
2360  IntrinsicType xmm4( (~C).load(i ,j3) );
2361  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
2362  IntrinsicType xmm6( (~C).load(i+1UL,j1) );
2363  IntrinsicType xmm7( (~C).load(i+1UL,j2) );
2364  IntrinsicType xmm8( (~C).load(i+1UL,j3) );
2365 
2366  for( size_t k=kbegin; k<kend; ++k ) {
2367  const IntrinsicType a1( set( A(i ,k) ) );
2368  const IntrinsicType a2( set( A(i+1UL,k) ) );
2369  const IntrinsicType b1( B.load(k,j ) );
2370  const IntrinsicType b2( B.load(k,j1) );
2371  const IntrinsicType b3( B.load(k,j2) );
2372  const IntrinsicType b4( B.load(k,j3) );
2373  xmm1 = xmm1 + a1 * b1;
2374  xmm2 = xmm2 + a1 * b2;
2375  xmm3 = xmm3 + a1 * b3;
2376  xmm4 = xmm4 + a1 * b4;
2377  xmm5 = xmm5 + a2 * b1;
2378  xmm6 = xmm6 + a2 * b2;
2379  xmm7 = xmm7 + a2 * b3;
2380  xmm8 = xmm8 + a2 * b4;
2381  }
2382 
2383  (~C).store( i , j , xmm1 );
2384  (~C).store( i , j1, xmm2 );
2385  (~C).store( i , j2, xmm3 );
2386  (~C).store( i , j3, xmm4 );
2387  (~C).store( i+1UL, j , xmm5 );
2388  (~C).store( i+1UL, j1, xmm6 );
2389  (~C).store( i+1UL, j2, xmm7 );
2390  (~C).store( i+1UL, j3, xmm8 );
2391  }
2392 
2393  if( i < iend )
2394  {
2395  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2396  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2397  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
2398  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
2399 
2400  IntrinsicType xmm1( (~C).load(i,j ) );
2401  IntrinsicType xmm2( (~C).load(i,j1) );
2402  IntrinsicType xmm3( (~C).load(i,j2) );
2403  IntrinsicType xmm4( (~C).load(i,j3) );
2404 
2405  for( size_t k=kbegin; k<kend; ++k ) {
2406  const IntrinsicType a1( set( A(i,k) ) );
2407  xmm1 = xmm1 + a1 * B.load(k,j );
2408  xmm2 = xmm2 + a1 * B.load(k,j1);
2409  xmm3 = xmm3 + a1 * B.load(k,j2);
2410  xmm4 = xmm4 + a1 * B.load(k,j3);
2411  }
2412 
2413  (~C).store( i, j , xmm1 );
2414  (~C).store( i, j1, xmm2 );
2415  (~C).store( i, j2, xmm3 );
2416  (~C).store( i, j3, xmm4 );
2417  }
2418  }
2419 
2420  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
2421  {
2422  const size_t j1( j+IT::size );
2423 
2424  size_t i( ii );
2425 
2426  for( ; (i+4UL) <= iend; i+=4UL )
2427  {
2428  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2429  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2430  const size_t kend ( min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
2431  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
2432 
2433  IntrinsicType xmm1( (~C).load(i ,j ) );
2434  IntrinsicType xmm2( (~C).load(i ,j1) );
2435  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
2436  IntrinsicType xmm4( (~C).load(i+1UL,j1) );
2437  IntrinsicType xmm5( (~C).load(i+2UL,j ) );
2438  IntrinsicType xmm6( (~C).load(i+2UL,j1) );
2439  IntrinsicType xmm7( (~C).load(i+3UL,j ) );
2440  IntrinsicType xmm8( (~C).load(i+3UL,j1) );
2441 
2442  for( size_t k=kbegin; k<kend; ++k ) {
2443  const IntrinsicType a1( set( A(i ,k) ) );
2444  const IntrinsicType a2( set( A(i+1UL,k) ) );
2445  const IntrinsicType a3( set( A(i+2UL,k) ) );
2446  const IntrinsicType a4( set( A(i+3UL,k) ) );
2447  const IntrinsicType b1( B.load(k,j ) );
2448  const IntrinsicType b2( B.load(k,j1) );
2449  xmm1 = xmm1 + a1 * b1;
2450  xmm2 = xmm2 + a1 * b2;
2451  xmm3 = xmm3 + a2 * b1;
2452  xmm4 = xmm4 + a2 * b2;
2453  xmm5 = xmm5 + a3 * b1;
2454  xmm6 = xmm6 + a3 * b2;
2455  xmm7 = xmm7 + a4 * b1;
2456  xmm8 = xmm8 + a4 * b2;
2457  }
2458 
2459  (~C).store( i , j , xmm1 );
2460  (~C).store( i , j1, xmm2 );
2461  (~C).store( i+1UL, j , xmm3 );
2462  (~C).store( i+1UL, j1, xmm4 );
2463  (~C).store( i+2UL, j , xmm5 );
2464  (~C).store( i+2UL, j1, xmm6 );
2465  (~C).store( i+3UL, j , xmm7 );
2466  (~C).store( i+3UL, j1, xmm8 );
2467  }
2468 
2469  for( ; (i+2UL) <= iend; i+=2UL )
2470  {
2471  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2472  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2473  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
2474  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
2475 
2476  IntrinsicType xmm1( (~C).load(i ,j ) );
2477  IntrinsicType xmm2( (~C).load(i ,j1) );
2478  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
2479  IntrinsicType xmm4( (~C).load(i+1UL,j1) );
2480 
2481  for( size_t k=kbegin; k<kend; ++k ) {
2482  const IntrinsicType a1( set( A(i ,k) ) );
2483  const IntrinsicType a2( set( A(i+1UL,k) ) );
2484  const IntrinsicType b1( B.load(k,j ) );
2485  const IntrinsicType b2( B.load(k,j1) );
2486  xmm1 = xmm1 + a1 * b1;
2487  xmm2 = xmm2 + a1 * b2;
2488  xmm3 = xmm3 + a2 * b1;
2489  xmm4 = xmm4 + a2 * b2;
2490  }
2491 
2492  (~C).store( i , j , xmm1 );
2493  (~C).store( i , j1, xmm2 );
2494  (~C).store( i+1UL, j , xmm3 );
2495  (~C).store( i+1UL, j1, xmm4 );
2496  }
2497 
2498  if( i < iend )
2499  {
2500  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2501  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2502  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
2503  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
2504 
2505  IntrinsicType xmm1( (~C).load(i,j ) );
2506  IntrinsicType xmm2( (~C).load(i,j1) );
2507 
2508  for( size_t k=kbegin; k<kend; ++k ) {
2509  const IntrinsicType a1( set( A(i,k) ) );
2510  xmm1 = xmm1 + a1 * B.load(k,j );
2511  xmm2 = xmm2 + a1 * B.load(k,j1);
2512  }
2513 
2514  (~C).store( i, j , xmm1 );
2515  (~C).store( i, j1, xmm2 );
2516  }
2517  }
2518 
2519  if( j < jend )
2520  {
2521  for( size_t i=ii; i<iend; ++i )
2522  {
2523  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2524  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2525  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
2526  ( IsUpper<MT5>::value )?( min( j+IT::size, ktmp ) ):( ktmp ) ) );
2527 
2528  IntrinsicType xmm1( (~C).load(i,j) );
2529 
2530  for( size_t k=kbegin; k<kend; ++k ) {
2531  const IntrinsicType a1( set( A(i,k) ) );
2532  xmm1 = xmm1 + a1 * B.load(k,j);
2533  }
2534 
2535  (~C).store( i, j, xmm1 );
2536  }
2537  }
2538  }
2539  }
2540  }
2541  }
2543  //**********************************************************************************************
2544 
2545  //**Vectorized default addition assignment to column-major dense matrices (large matrices)******
2559  template< typename MT3 // Type of the left-hand side target matrix
2560  , typename MT4 // Type of the left-hand side matrix operand
2561  , typename MT5 > // Type of the right-hand side matrix operand
2562  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2563  selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2564  {
2565  selectSmallAddAssignKernel( ~C, A, B );
2566  }
2568  //**********************************************************************************************
2569 
2570  //**BLAS-based addition assignment to dense matrices (default)**********************************
2584  template< typename MT3 // Type of the left-hand side target matrix
2585  , typename MT4 // Type of the left-hand side matrix operand
2586  , typename MT5 > // Type of the right-hand side matrix operand
2587  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2588  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2589  {
2590  selectLargeAddAssignKernel( C, A, B );
2591  }
2593  //**********************************************************************************************
2594 
2595  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2596 #if BLAZE_BLAS_MODE
2597 
2610  template< typename MT3 // Type of the left-hand side target matrix
2611  , typename MT4 // Type of the left-hand side matrix operand
2612  , typename MT5 > // Type of the right-hand side matrix operand
2613  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2614  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2615  {
2616  if( IsTriangular<MT4>::value ) {
2617  typename MT3::ResultType tmp( B );
2618  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
2619  addAssign( C, tmp );
2620  }
2621  else if( IsTriangular<MT5>::value ) {
2622  typename MT3::ResultType tmp( A );
2623  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
2624  addAssign( C, tmp );
2625  }
2626  else {
2627  sgemm( C, A, B, 1.0F, 1.0F );
2628  }
2629  }
2631 #endif
2632  //**********************************************************************************************
2633 
2634  //**BLAS-based addition assignment to dense matrices (double precision)*************************
2635 #if BLAZE_BLAS_MODE
2636 
2649  template< typename MT3 // Type of the left-hand side target matrix
2650  , typename MT4 // Type of the left-hand side matrix operand
2651  , typename MT5 > // Type of the right-hand side matrix operand
2652  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2653  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2654  {
2655  if( IsTriangular<MT4>::value ) {
2656  typename MT3::ResultType tmp( B );
2657  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
2658  addAssign( C, tmp );
2659  }
2660  else if( IsTriangular<MT5>::value ) {
2661  typename MT3::ResultType tmp( A );
2662  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
2663  addAssign( C, tmp );
2664  }
2665  else {
2666  dgemm( C, A, B, 1.0, 1.0 );
2667  }
2668  }
2670 #endif
2671  //**********************************************************************************************
2672 
2673  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
2674 #if BLAZE_BLAS_MODE
2675 
2688  template< typename MT3 // Type of the left-hand side target matrix
2689  , typename MT4 // Type of the left-hand side matrix operand
2690  , typename MT5 > // Type of the right-hand side matrix operand
2691  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2692  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2693  {
2694  if( IsTriangular<MT4>::value ) {
2695  typename MT3::ResultType tmp( B );
2696  ctrmm( tmp, A, CblasLeft,
2697  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
2698  complex<float>( 1.0F, 0.0F ) );
2699  addAssign( C, tmp );
2700  }
2701  else if( IsTriangular<MT5>::value ) {
2702  typename MT3::ResultType tmp( A );
2703  ctrmm( tmp, B, CblasRight,
2704  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
2705  complex<float>( 1.0F, 0.0F ) );
2706  addAssign( C, tmp );
2707  }
2708  else {
2709  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2710  }
2711  }
2713 #endif
2714  //**********************************************************************************************
2715 
2716  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
2717 #if BLAZE_BLAS_MODE
2718 
2731  template< typename MT3 // Type of the left-hand side target matrix
2732  , typename MT4 // Type of the left-hand side matrix operand
2733  , typename MT5 > // Type of the right-hand side matrix operand
2734  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2735  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2736  {
2737  if( IsTriangular<MT4>::value ) {
2738  typename MT3::ResultType tmp( B );
2739  ztrmm( tmp, A, CblasLeft,
2740  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
2741  complex<double>( 1.0, 0.0 ) );
2742  addAssign( C, tmp );
2743  }
2744  else if( IsTriangular<MT5>::value ) {
2745  typename MT3::ResultType tmp( A );
2746  ztrmm( tmp, B, CblasRight,
2747  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
2748  complex<double>( 1.0, 0.0 ) );
2749  addAssign( C, tmp );
2750  }
2751  else {
2752  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2753  }
2754  }
2756 #endif
2757  //**********************************************************************************************
2758 
2759  //**Restructuring addition assignment to column-major matrices**********************************
2774  template< typename MT > // Type of the target matrix
2775  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2776  addAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
2777  {
2779 
2781 
2782  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2783  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2784 
2785  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2786  addAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2787  else if( IsSymmetric<MT1>::value )
2788  addAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2789  else
2790  addAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2791  }
2793  //**********************************************************************************************
2794 
2795  //**Addition assignment to sparse matrices******************************************************
2796  // No special implementation for the addition assignment to sparse matrices.
2797  //**********************************************************************************************
2798 
2799  //**Subtraction assignment to dense matrices****************************************************
2812  template< typename MT // Type of the target dense matrix
2813  , bool SO > // Storage order of the target dense matrix
2814  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2815  subAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2816  {
2818 
2819  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2820  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2821 
2822  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2823  return;
2824  }
2825 
2826  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
2827  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
2828 
2829  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2830  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2831  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2832  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2833  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2834  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2835 
2836  DMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
2837  }
2839  //**********************************************************************************************
2840 
2841  //**Subtraction assignment to dense matrices (kernel selection)*********************************
2852  template< typename MT3 // Type of the left-hand side target matrix
2853  , typename MT4 // Type of the left-hand side matrix operand
2854  , typename MT5 > // Type of the right-hand side matrix operand
2855  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2856  {
2857  if( ( IsDiagonal<MT5>::value ) ||
2858  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
2859  selectSmallSubAssignKernel( C, A, B );
2860  else
2861  selectBlasSubAssignKernel( C, A, B );
2862  }
2864  //**********************************************************************************************
2865 
2866  //**Default subtraction assignment to dense matrices (general/general)**************************
2880  template< typename MT3 // Type of the left-hand side target matrix
2881  , typename MT4 // Type of the left-hand side matrix operand
2882  , typename MT5 > // Type of the right-hand side matrix operand
2883  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
2884  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2885  {
2886  const size_t M( A.rows() );
2887  const size_t N( B.columns() );
2888  const size_t K( A.columns() );
2889 
2890  for( size_t i=0UL; i<M; ++i )
2891  {
2892  const size_t kbegin( ( IsUpper<MT4>::value )
2893  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2894  :( 0UL ) );
2895  const size_t kend( ( IsLower<MT4>::value )
2896  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
2897  :( K ) );
2898  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
2899 
2900  for( size_t k=kbegin; k<kend; ++k )
2901  {
2902  const size_t jbegin( ( IsUpper<MT5>::value )
2903  ?( IsStrictlyUpper<MT5>::value ? k+1UL : k )
2904  :( 0UL ) );
2905  const size_t jend( ( IsLower<MT5>::value )
2906  ?( IsStrictlyLower<MT5>::value ? k : k+1UL )
2907  :( N ) );
2908  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2909 
2910  const size_t jnum( jend - jbegin );
2911  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
2912 
2913  for( size_t j=jbegin; j<jpos; j+=2UL ) {
2914  C(i,j ) -= A(i,k) * B(k,j );
2915  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
2916  }
2917  if( jpos < jend ) {
2918  C(i,jpos) -= A(i,k) * B(k,jpos);
2919  }
2920  }
2921  }
2922  }
2924  //**********************************************************************************************
2925 
2926  //**Default subtraction assignment to dense matrices (general/diagonal)*************************
2940  template< typename MT3 // Type of the left-hand side target matrix
2941  , typename MT4 // Type of the left-hand side matrix operand
2942  , typename MT5 > // Type of the right-hand side matrix operand
2943  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
2944  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2945  {
2947 
2948  const size_t M( A.rows() );
2949  const size_t N( B.columns() );
2950 
2951  for( size_t i=0UL; i<M; ++i )
2952  {
2953  const size_t jbegin( ( IsUpper<MT4>::value )
2954  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2955  :( 0UL ) );
2956  const size_t jend( ( IsLower<MT4>::value )
2957  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
2958  :( N ) );
2959  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2960 
2961  const size_t jnum( jend - jbegin );
2962  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
2963 
2964  for( size_t j=jbegin; j<jpos; j+=2UL ) {
2965  C(i,j ) -= A(i,j ) * B(j ,j );
2966  C(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL);
2967  }
2968  if( jpos < jend ) {
2969  C(i,jpos) -= A(i,jpos) * B(jpos,jpos);
2970  }
2971  }
2972  }
2974  //**********************************************************************************************
2975 
2976  //**Default subtraction assignment to dense matrices (diagonal/general)*************************
2990  template< typename MT3 // Type of the left-hand side target matrix
2991  , typename MT4 // Type of the left-hand side matrix operand
2992  , typename MT5 > // Type of the right-hand side matrix operand
2993  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
2994  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2995  {
2997 
2998  const size_t M( A.rows() );
2999  const size_t N( B.columns() );
3000 
3001  for( size_t i=0UL; i<M; ++i )
3002  {
3003  const size_t jbegin( ( IsUpper<MT5>::value )
3004  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
3005  :( 0UL ) );
3006  const size_t jend( ( IsLower<MT5>::value )
3007  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
3008  :( N ) );
3009  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3010 
3011  const size_t jnum( jend - jbegin );
3012  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
3013 
3014  for( size_t j=jbegin; j<jpos; j+=2UL ) {
3015  C(i,j ) -= A(i,i) * B(i,j );
3016  C(i,j+1UL) -= A(i,i) * B(i,j+1UL);
3017  }
3018  if( jpos < jend ) {
3019  C(i,jpos) -= A(i,i) * B(i,jpos);
3020  }
3021  }
3022  }
3024  //**********************************************************************************************
3025 
3026  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
3040  template< typename MT3 // Type of the left-hand side target matrix
3041  , typename MT4 // Type of the left-hand side matrix operand
3042  , typename MT5 > // Type of the right-hand side matrix operand
3043  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
3044  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3045  {
3047 
3048  for( size_t i=0UL; i<A.rows(); ++i ) {
3049  C(i,i) -= A(i,i) * B(i,i);
3050  }
3051  }
3053  //**********************************************************************************************
3054 
3055  //**Default subtraction assignment to dense matrices (small matrices)***************************
3069  template< typename MT3 // Type of the left-hand side target matrix
3070  , typename MT4 // Type of the left-hand side matrix operand
3071  , typename MT5 > // Type of the right-hand side matrix operand
3072  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3073  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3074  {
3075  selectDefaultSubAssignKernel( C, A, B );
3076  }
3078  //**********************************************************************************************
3079 
3080  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
3095  template< typename MT3 // Type of the left-hand side target matrix
3096  , typename MT4 // Type of the left-hand side matrix operand
3097  , typename MT5 > // Type of the right-hand side matrix operand
3098  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3099  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3100  {
3101  typedef IntrinsicTrait<ElementType> IT;
3102 
3103  const size_t M( A.rows() );
3104  const size_t N( B.columns() );
3105  const size_t K( A.columns() );
3106 
3107  size_t j( 0UL );
3108 
3109  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3110  for( size_t i=0UL; i<M; ++i )
3111  {
3112  const size_t kbegin( ( IsUpper<MT4>::value )
3113  ?( ( IsLower<MT5>::value )
3114  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3115  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3116  :( IsLower<MT5>::value ? j : 0UL ) );
3117  const size_t kend( ( IsLower<MT4>::value )
3118  ?( ( IsUpper<MT5>::value )
3119  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+IT::size*8UL, K ) )
3120  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
3121  :( IsUpper<MT5>::value ? min( j+IT::size*8UL, K ) : K ) );
3122 
3123  IntrinsicType xmm1( (~C).load(i,j ) );
3124  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
3125  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
3126  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
3127  IntrinsicType xmm5( (~C).load(i,j+IT::size*4UL) );
3128  IntrinsicType xmm6( (~C).load(i,j+IT::size*5UL) );
3129  IntrinsicType xmm7( (~C).load(i,j+IT::size*6UL) );
3130  IntrinsicType xmm8( (~C).load(i,j+IT::size*7UL) );
3131 
3132  for( size_t k=kbegin; k<kend; ++k ) {
3133  const IntrinsicType a1( set( A(i,k) ) );
3134  xmm1 = xmm1 - a1 * B.load(k,j );
3135  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
3136  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
3137  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
3138  xmm5 = xmm5 - a1 * B.load(k,j+IT::size*4UL);
3139  xmm6 = xmm6 - a1 * B.load(k,j+IT::size*5UL);
3140  xmm7 = xmm7 - a1 * B.load(k,j+IT::size*6UL);
3141  xmm8 = xmm8 - a1 * B.load(k,j+IT::size*7UL);
3142  }
3143 
3144  (~C).store( i, j , xmm1 );
3145  (~C).store( i, j+IT::size , xmm2 );
3146  (~C).store( i, j+IT::size*2UL, xmm3 );
3147  (~C).store( i, j+IT::size*3UL, xmm4 );
3148  (~C).store( i, j+IT::size*4UL, xmm5 );
3149  (~C).store( i, j+IT::size*5UL, xmm6 );
3150  (~C).store( i, j+IT::size*6UL, xmm7 );
3151  (~C).store( i, j+IT::size*7UL, xmm8 );
3152  }
3153  }
3154 
3155  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
3156  {
3157  size_t i( 0UL );
3158 
3159  for( ; (i+2UL) <= M; i+=2UL )
3160  {
3161  const size_t kbegin( ( IsUpper<MT4>::value )
3162  ?( ( IsLower<MT5>::value )
3163  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3164  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3165  :( IsLower<MT5>::value ? j : 0UL ) );
3166  const size_t kend( ( IsLower<MT4>::value )
3167  ?( ( IsUpper<MT5>::value )
3168  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*4UL, K ) )
3169  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
3170  :( IsUpper<MT5>::value ? min( j+IT::size*4UL, K ) : K ) );
3171 
3172  IntrinsicType xmm1( (~C).load(i ,j ) );
3173  IntrinsicType xmm2( (~C).load(i ,j+IT::size ) );
3174  IntrinsicType xmm3( (~C).load(i ,j+IT::size*2UL) );
3175  IntrinsicType xmm4( (~C).load(i ,j+IT::size*3UL) );
3176  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
3177  IntrinsicType xmm6( (~C).load(i+1UL,j+IT::size ) );
3178  IntrinsicType xmm7( (~C).load(i+1UL,j+IT::size*2UL) );
3179  IntrinsicType xmm8( (~C).load(i+1UL,j+IT::size*3UL) );
3180 
3181  for( size_t k=kbegin; k<kend; ++k ) {
3182  const IntrinsicType a1( set( A(i ,k) ) );
3183  const IntrinsicType a2( set( A(i+1UL,k) ) );
3184  const IntrinsicType b1( B.load(k,j ) );
3185  const IntrinsicType b2( B.load(k,j+IT::size ) );
3186  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
3187  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
3188  xmm1 = xmm1 - a1 * b1;
3189  xmm2 = xmm2 - a1 * b2;
3190  xmm3 = xmm3 - a1 * b3;
3191  xmm4 = xmm4 - a1 * b4;
3192  xmm5 = xmm5 - a2 * b1;
3193  xmm6 = xmm6 - a2 * b2;
3194  xmm7 = xmm7 - a2 * b3;
3195  xmm8 = xmm8 - a2 * b4;
3196  }
3197 
3198  (~C).store( i , j , xmm1 );
3199  (~C).store( i , j+IT::size , xmm2 );
3200  (~C).store( i , j+IT::size*2UL, xmm3 );
3201  (~C).store( i , j+IT::size*3UL, xmm4 );
3202  (~C).store( i+1UL, j , xmm5 );
3203  (~C).store( i+1UL, j+IT::size , xmm6 );
3204  (~C).store( i+1UL, j+IT::size*2UL, xmm7 );
3205  (~C).store( i+1UL, j+IT::size*3UL, xmm8 );
3206  }
3207 
3208  if( i < M )
3209  {
3210  const size_t kbegin( ( IsUpper<MT4>::value )
3211  ?( ( IsLower<MT5>::value )
3212  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3213  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3214  :( IsLower<MT5>::value ? j : 0UL ) );
3215  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, K ) ):( K ) );
3216 
3217  IntrinsicType xmm1( (~C).load(i,j ) );
3218  IntrinsicType xmm2( (~C).load(i,j+IT::size ) );
3219  IntrinsicType xmm3( (~C).load(i,j+IT::size*2UL) );
3220  IntrinsicType xmm4( (~C).load(i,j+IT::size*3UL) );
3221 
3222  for( size_t k=kbegin; k<kend; ++k ) {
3223  const IntrinsicType a1( set( A(i,k) ) );
3224  xmm1 = xmm1 - a1 * B.load(k,j );
3225  xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
3226  xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
3227  xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
3228  }
3229 
3230  (~C).store( i, j , xmm1 );
3231  (~C).store( i, j+IT::size , xmm2 );
3232  (~C).store( i, j+IT::size*2UL, xmm3 );
3233  (~C).store( i, j+IT::size*3UL, xmm4 );
3234  }
3235  }
3236 
3237  for( ; (j+IT::size) < N; j+=IT::size*2UL )
3238  {
3239  size_t i( 0UL );
3240 
3241  for( ; (i+2UL) <= M; i+=2UL )
3242  {
3243  const size_t kbegin( ( IsUpper<MT4>::value )
3244  ?( ( IsLower<MT5>::value )
3245  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3246  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3247  :( IsLower<MT5>::value ? j : 0UL ) );
3248  const size_t kend( ( IsLower<MT4>::value )
3249  ?( ( IsUpper<MT5>::value )
3250  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*2UL, K ) )
3251  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
3252  :( IsUpper<MT5>::value ? min( j+IT::size*2UL, K ) : K ) );
3253 
3254  IntrinsicType xmm1( (~C).load(i ,j ) );
3255  IntrinsicType xmm2( (~C).load(i ,j+IT::size) );
3256  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
3257  IntrinsicType xmm4( (~C).load(i+1UL,j+IT::size) );
3258 
3259  for( size_t k=kbegin; k<kend; ++k ) {
3260  const IntrinsicType a1( set( A(i ,k) ) );
3261  const IntrinsicType a2( set( A(i+1UL,k) ) );
3262  const IntrinsicType b1( B.load(k,j ) );
3263  const IntrinsicType b2( B.load(k,j+IT::size) );
3264  xmm1 = xmm1 - a1 * b1;
3265  xmm2 = xmm2 - a1 * b2;
3266  xmm3 = xmm3 - a2 * b1;
3267  xmm4 = xmm4 - a2 * b2;
3268  }
3269 
3270  (~C).store( i , j , xmm1 );
3271  (~C).store( i , j+IT::size, xmm2 );
3272  (~C).store( i+1UL, j , xmm3 );
3273  (~C).store( i+1UL, j+IT::size, xmm4 );
3274  }
3275 
3276  if( i < M )
3277  {
3278  const size_t kbegin( ( IsUpper<MT4>::value )
3279  ?( ( IsLower<MT5>::value )
3280  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3281  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3282  :( IsLower<MT5>::value ? j : 0UL ) );
3283  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, K ) ):( K ) );
3284 
3285  IntrinsicType xmm1( (~C).load(i,j ) );
3286  IntrinsicType xmm2( (~C).load(i,j+IT::size) );
3287 
3288  for( size_t k=kbegin; k<kend; ++k ) {
3289  const IntrinsicType a1( set( A(i,k) ) );
3290  xmm1 = xmm1 - a1 * B.load(k,j );
3291  xmm2 = xmm2 - a1 * B.load(k,j+IT::size);
3292  }
3293 
3294  (~C).store( i, j , xmm1 );
3295  (~C).store( i, j+IT::size, xmm2 );
3296  }
3297  }
3298 
3299  if( j < N )
3300  {
3301  size_t i( 0UL );
3302 
3303  for( ; (i+2UL) <= M; i+=2UL )
3304  {
3305  const size_t kbegin( ( IsUpper<MT4>::value )
3306  ?( ( IsLower<MT5>::value )
3307  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3308  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3309  :( IsLower<MT5>::value ? j : 0UL ) );
3310  const size_t kend( ( IsLower<MT4>::value )
3311  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
3312  :( K ) );
3313 
3314  IntrinsicType xmm1( (~C).load(i ,j) );
3315  IntrinsicType xmm2( (~C).load(i+1UL,j) );
3316 
3317  for( size_t k=kbegin; k<kend; ++k ) {
3318  const IntrinsicType b1( B.load(k,j) );
3319  xmm1 = xmm1 - set( A(i ,k) ) * b1;
3320  xmm2 = xmm2 - set( A(i+1UL,k) ) * b1;
3321  }
3322 
3323  (~C).store( i , j, xmm1 );
3324  (~C).store( i+1UL, j, xmm2 );
3325  }
3326 
3327  if( i < M )
3328  {
3329  const size_t kbegin( ( IsUpper<MT4>::value )
3330  ?( ( IsLower<MT5>::value )
3331  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3332  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3333  :( IsLower<MT5>::value ? j : 0UL ) );
3334 
3335  IntrinsicType xmm1( (~C).load(i,j) );
3336 
3337  for( size_t k=kbegin; k<K; ++k ) {
3338  xmm1 = xmm1 - set( A(i,k) ) * B.load(k,j);
3339  }
3340 
3341  (~C).store( i, j, xmm1 );
3342  }
3343  }
3344  }
3346  //**********************************************************************************************
3347 
3348  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
3363  template< typename MT3 // Type of the left-hand side target matrix
3364  , typename MT4 // Type of the left-hand side matrix operand
3365  , typename MT5 > // Type of the right-hand side matrix operand
3366  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3367  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3368  {
3373 
3374  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3375  const typename MT4::OppositeType tmp( serial( A ) );
3376  subAssign( ~C, tmp * B );
3377  }
3378  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3379  const typename MT5::OppositeType tmp( serial( B ) );
3380  subAssign( ~C, A * tmp );
3381  }
3382  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3383  const typename MT4::OppositeType tmp( serial( A ) );
3384  subAssign( ~C, tmp * B );
3385  }
3386  else {
3387  const typename MT5::OppositeType tmp( serial( B ) );
3388  subAssign( ~C, A * tmp );
3389  }
3390  }
3392  //**********************************************************************************************
3393 
3394  //**Default subtraction assignment to dense matrices (large matrices)***************************
3408  template< typename MT3 // Type of the left-hand side target matrix
3409  , typename MT4 // Type of the left-hand side matrix operand
3410  , typename MT5 > // Type of the right-hand side matrix operand
3411  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3412  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3413  {
3414  selectDefaultSubAssignKernel( C, A, B );
3415  }
3417  //**********************************************************************************************
3418 
3419  //**Vectorized default subtraction assignment to row-major dense matrices (large matrices)******
3434  template< typename MT3 // Type of the left-hand side target matrix
3435  , typename MT4 // Type of the left-hand side matrix operand
3436  , typename MT5 > // Type of the right-hand side matrix operand
3437  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3438  selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3439  {
3440  typedef IntrinsicTrait<ElementType> IT;
3441 
3442  const size_t M( A.rows() );
3443  const size_t N( B.columns() );
3444  const size_t K( A.columns() );
3445 
3446  const size_t iblock( 64UL );
3447  const size_t jblock( 128UL );
3448  const size_t kblock( 128UL );
3449 
3450  for( size_t jj=0UL; jj<N; jj+=jblock )
3451  {
3452  const size_t jend( min( jj+jblock, N ) );
3453 
3454  for( size_t ii=0UL; ii<M; ii+=iblock )
3455  {
3456  const size_t iend( min( ii+iblock, M ) );
3457 
3458  for( size_t kk=0UL; kk<K; kk+=kblock )
3459  {
3460  const size_t ktmp( min( kk+kblock, K ) );
3461 
3462  size_t j( jj );
3463 
3464  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
3465  {
3466  const size_t j1( j+IT::size );
3467  const size_t j2( j+IT::size*2UL );
3468  const size_t j3( j+IT::size*3UL );
3469 
3470  size_t i( ii );
3471 
3472  for( ; (i+2UL) <= iend; i+=2UL )
3473  {
3474  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3475  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3476  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
3477  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
3478 
3479  IntrinsicType xmm1( (~C).load(i ,j ) );
3480  IntrinsicType xmm2( (~C).load(i ,j1) );
3481  IntrinsicType xmm3( (~C).load(i ,j2) );
3482  IntrinsicType xmm4( (~C).load(i ,j3) );
3483  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
3484  IntrinsicType xmm6( (~C).load(i+1UL,j1) );
3485  IntrinsicType xmm7( (~C).load(i+1UL,j2) );
3486  IntrinsicType xmm8( (~C).load(i+1UL,j3) );
3487 
3488  for( size_t k=kbegin; k<kend; ++k ) {
3489  const IntrinsicType a1( set( A(i ,k) ) );
3490  const IntrinsicType a2( set( A(i+1UL,k) ) );
3491  const IntrinsicType b1( B.load(k,j ) );
3492  const IntrinsicType b2( B.load(k,j1) );
3493  const IntrinsicType b3( B.load(k,j2) );
3494  const IntrinsicType b4( B.load(k,j3) );
3495  xmm1 = xmm1 - a1 * b1;
3496  xmm2 = xmm2 - a1 * b2;
3497  xmm3 = xmm3 - a1 * b3;
3498  xmm4 = xmm4 - a1 * b4;
3499  xmm5 = xmm5 - a2 * b1;
3500  xmm6 = xmm6 - a2 * b2;
3501  xmm7 = xmm7 - a2 * b3;
3502  xmm8 = xmm8 - a2 * b4;
3503  }
3504 
3505  (~C).store( i , j , xmm1 );
3506  (~C).store( i , j1, xmm2 );
3507  (~C).store( i , j2, xmm3 );
3508  (~C).store( i , j3, xmm4 );
3509  (~C).store( i+1UL, j , xmm5 );
3510  (~C).store( i+1UL, j1, xmm6 );
3511  (~C).store( i+1UL, j2, xmm7 );
3512  (~C).store( i+1UL, j3, xmm8 );
3513  }
3514 
3515  if( i < iend )
3516  {
3517  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3518  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3519  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
3520  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
3521 
3522  IntrinsicType xmm1( (~C).load(i,j ) );
3523  IntrinsicType xmm2( (~C).load(i,j1) );
3524  IntrinsicType xmm3( (~C).load(i,j2) );
3525  IntrinsicType xmm4( (~C).load(i,j3) );
3526 
3527  for( size_t k=kbegin; k<kend; ++k ) {
3528  const IntrinsicType a1( set( A(i,k) ) );
3529  xmm1 = xmm1 - a1 * B.load(k,j );
3530  xmm2 = xmm2 - a1 * B.load(k,j1);
3531  xmm3 = xmm3 - a1 * B.load(k,j2);
3532  xmm4 = xmm4 - a1 * B.load(k,j3);
3533  }
3534 
3535  (~C).store( i, j , xmm1 );
3536  (~C).store( i, j1, xmm2 );
3537  (~C).store( i, j2, xmm3 );
3538  (~C).store( i, j3, xmm4 );
3539  }
3540  }
3541 
3542  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
3543  {
3544  const size_t j1( j+IT::size );
3545 
3546  size_t i( ii );
3547 
3548  for( ; (i+4UL) <= iend; i+=4UL )
3549  {
3550  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3551  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3552  const size_t kend ( min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
3553  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
3554 
3555  IntrinsicType xmm1( (~C).load(i ,j ) );
3556  IntrinsicType xmm2( (~C).load(i ,j1) );
3557  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
3558  IntrinsicType xmm4( (~C).load(i+1UL,j1) );
3559  IntrinsicType xmm5( (~C).load(i+2UL,j ) );
3560  IntrinsicType xmm6( (~C).load(i+2UL,j1) );
3561  IntrinsicType xmm7( (~C).load(i+3UL,j ) );
3562  IntrinsicType xmm8( (~C).load(i+3UL,j1) );
3563 
3564  for( size_t k=kbegin; k<kend; ++k ) {
3565  const IntrinsicType a1( set( A(i ,k) ) );
3566  const IntrinsicType a2( set( A(i+1UL,k) ) );
3567  const IntrinsicType a3( set( A(i+2UL,k) ) );
3568  const IntrinsicType a4( set( A(i+3UL,k) ) );
3569  const IntrinsicType b1( B.load(k,j ) );
3570  const IntrinsicType b2( B.load(k,j1) );
3571  xmm1 = xmm1 - a1 * b1;
3572  xmm2 = xmm2 - a1 * b2;
3573  xmm3 = xmm3 - a2 * b1;
3574  xmm4 = xmm4 - a2 * b2;
3575  xmm5 = xmm5 - a3 * b1;
3576  xmm6 = xmm6 - a3 * b2;
3577  xmm7 = xmm7 - a4 * b1;
3578  xmm8 = xmm8 - a4 * b2;
3579  }
3580 
3581  (~C).store( i , j , xmm1 );
3582  (~C).store( i , j1, xmm2 );
3583  (~C).store( i+1UL, j , xmm3 );
3584  (~C).store( i+1UL, j1, xmm4 );
3585  (~C).store( i+2UL, j , xmm5 );
3586  (~C).store( i+2UL, j1, xmm6 );
3587  (~C).store( i+3UL, j , xmm7 );
3588  (~C).store( i+3UL, j1, xmm8 );
3589  }
3590 
3591  for( ; (i+2UL) <= iend; i+=2UL )
3592  {
3593  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3594  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3595  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
3596  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
3597 
3598  IntrinsicType xmm1( (~C).load(i ,j ) );
3599  IntrinsicType xmm2( (~C).load(i ,j1) );
3600  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
3601  IntrinsicType xmm4( (~C).load(i+1UL,j1) );
3602 
3603  for( size_t k=kbegin; k<kend; ++k ) {
3604  const IntrinsicType a1( set( A(i ,k) ) );
3605  const IntrinsicType a2( set( A(i+1UL,k) ) );
3606  const IntrinsicType b1( B.load(k,j ) );
3607  const IntrinsicType b2( B.load(k,j1) );
3608  xmm1 = xmm1 - a1 * b1;
3609  xmm2 = xmm2 - a1 * b2;
3610  xmm3 = xmm3 - a2 * b1;
3611  xmm4 = xmm4 - a2 * b2;
3612  }
3613 
3614  (~C).store( i , j , xmm1 );
3615  (~C).store( i , j1, xmm2 );
3616  (~C).store( i+1UL, j , xmm3 );
3617  (~C).store( i+1UL, j1, xmm4 );
3618  }
3619 
3620  if( i < iend )
3621  {
3622  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3623  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3624  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
3625  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
3626 
3627  IntrinsicType xmm1( (~C).load(i,j ) );
3628  IntrinsicType xmm2( (~C).load(i,j1) );
3629 
3630  for( size_t k=kbegin; k<kend; ++k ) {
3631  const IntrinsicType a1( set( A(i,k) ) );
3632  xmm1 = xmm1 - a1 * B.load(k,j );
3633  xmm2 = xmm2 - a1 * B.load(k,j1);
3634  }
3635 
3636  (~C).store( i, j , xmm1 );
3637  (~C).store( i, j1, xmm2 );
3638  }
3639  }
3640 
3641  if( j < jend )
3642  {
3643  for( size_t i=ii; i<iend; ++i )
3644  {
3645  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3646  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3647  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
3648  ( IsUpper<MT5>::value )?( min( j+IT::size, ktmp ) ):( ktmp ) ) );
3649 
3650  IntrinsicType xmm1( (~C).load(i,j) );
3651 
3652  for( size_t k=kbegin; k<kend; ++k ) {
3653  const IntrinsicType a1( set( A(i,k) ) );
3654  xmm1 = xmm1 - a1 * B.load(k,j);
3655  }
3656 
3657  (~C).store( i, j, xmm1 );
3658  }
3659  }
3660  }
3661  }
3662  }
3663  }
3665  //**********************************************************************************************
3666 
3667  //**Vectorized default subtraction assignment to column-major dense matrices (large matrices)***
3681  template< typename MT3 // Type of the left-hand side target matrix
3682  , typename MT4 // Type of the left-hand side matrix operand
3683  , typename MT5 > // Type of the right-hand side matrix operand
3684  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3685  selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3686  {
3687  selectSmallSubAssignKernel( ~C, A, B );
3688  }
3690  //**********************************************************************************************
3691 
3692  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3706  template< typename MT3 // Type of the left-hand side target matrix
3707  , typename MT4 // Type of the left-hand side matrix operand
3708  , typename MT5 > // Type of the right-hand side matrix operand
3709  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
3710  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3711  {
3712  selectLargeSubAssignKernel( C, A, B );
3713  }
3715  //**********************************************************************************************
3716 
3717  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3718 #if BLAZE_BLAS_MODE
3719 
3732  template< typename MT3 // Type of the left-hand side target matrix
3733  , typename MT4 // Type of the left-hand side matrix operand
3734  , typename MT5 > // Type of the right-hand side matrix operand
3735  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
3736  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3737  {
3738  if( IsTriangular<MT4>::value ) {
3739  typename MT3::ResultType tmp( B );
3740  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
3741  subAssign( C, tmp );
3742  }
3743  else if( IsTriangular<MT5>::value ) {
3744  typename MT3::ResultType tmp( A );
3745  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
3746  subAssign( C, tmp );
3747  }
3748  else {
3749  sgemm( C, A, B, -1.0F, 1.0F );
3750  }
3751  }
3753 #endif
3754  //**********************************************************************************************
3755 
3756  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3757 #if BLAZE_BLAS_MODE
3758 
3771  template< typename MT3 // Type of the left-hand side target matrix
3772  , typename MT4 // Type of the left-hand side matrix operand
3773  , typename MT5 > // Type of the right-hand side matrix operand
3774  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
3775  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3776  {
3777  if( IsTriangular<MT4>::value ) {
3778  typename MT3::ResultType tmp( B );
3779  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
3780  subAssign( C, tmp );
3781  }
3782  else if( IsTriangular<MT5>::value ) {
3783  typename MT3::ResultType tmp( A );
3784  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
3785  subAssign( C, tmp );
3786  }
3787  else {
3788  dgemm( C, A, B, -1.0, 1.0 );
3789  }
3790  }
3792 #endif
3793  //**********************************************************************************************
3794 
3795  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3796 #if BLAZE_BLAS_MODE
3797 
3810  template< typename MT3 // Type of the left-hand side target matrix
3811  , typename MT4 // Type of the left-hand side matrix operand
3812  , typename MT5 > // Type of the right-hand side matrix operand
3813  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3814  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3815  {
3816  if( IsTriangular<MT4>::value ) {
3817  typename MT3::ResultType tmp( B );
3818  ctrmm( tmp, A, CblasLeft,
3819  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
3820  complex<float>( 1.0F, 0.0F ) );
3821  subAssign( C, tmp );
3822  }
3823  else if( IsTriangular<MT5>::value ) {
3824  typename MT3::ResultType tmp( A );
3825  ctrmm( tmp, B, CblasRight,
3826  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
3827  complex<float>( 1.0F, 0.0F ) );
3828  subAssign( C, tmp );
3829  }
3830  else {
3831  cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3832  }
3833  }
3835 #endif
3836  //**********************************************************************************************
3837 
3838  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3839 #if BLAZE_BLAS_MODE
3840 
3853  template< typename MT3 // Type of the left-hand side target matrix
3854  , typename MT4 // Type of the left-hand side matrix operand
3855  , typename MT5 > // Type of the right-hand side matrix operand
3856  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3857  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3858  {
3859  if( IsTriangular<MT4>::value ) {
3860  typename MT3::ResultType tmp( B );
3861  ztrmm( tmp, A, CblasLeft,
3862  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
3863  complex<float>( 1.0, 0.0 ) );
3864  subAssign( C, tmp );
3865  }
3866  else if( IsTriangular<MT5>::value ) {
3867  typename MT3::ResultType tmp( A );
3868  ztrmm( tmp, B, CblasRight,
3869  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
3870  complex<float>( 1.0, 0.0 ) );
3871  subAssign( C, tmp );
3872  }
3873  else {
3874  zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
3875  }
3876  }
3878 #endif
3879  //**********************************************************************************************
3880 
3881  //**Restructuring subtraction assignment to column-major matrices*******************************
3896  template< typename MT > // Type of the target matrix
3897  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3898  subAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
3899  {
3901 
3903 
3904  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3905  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3906 
3907  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
3908  subAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
3909  else if( IsSymmetric<MT1>::value )
3910  subAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
3911  else
3912  subAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
3913  }
3915  //**********************************************************************************************
3916 
3917  //**Subtraction assignment to sparse matrices***************************************************
3918  // No special implementation for the subtraction assignment to sparse matrices.
3919  //**********************************************************************************************
3920 
3921  //**Multiplication assignment to dense matrices*************************************************
3922  // No special implementation for the multiplication assignment to dense matrices.
3923  //**********************************************************************************************
3924 
3925  //**Multiplication assignment to sparse matrices************************************************
3926  // No special implementation for the multiplication assignment to sparse matrices.
3927  //**********************************************************************************************
3928 
3929  //**SMP assignment to dense matrices************************************************************
3944  template< typename MT // Type of the target dense matrix
3945  , bool SO > // Storage order of the target dense matrix
3946  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
3947  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
3948  {
3950 
3951  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3952  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3953 
3954  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
3955  return;
3956  }
3957  else if( rhs.lhs_.columns() == 0UL ) {
3958  reset( ~lhs );
3959  return;
3960  }
3961 
3962  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3963  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3964 
3965  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3966  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3967  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3968  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3969  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3970  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3971 
3972  smpAssign( ~lhs, A * B );
3973  }
3975  //**********************************************************************************************
3976 
3977  //**SMP assignment to sparse matrices***********************************************************
3992  template< typename MT // Type of the target sparse matrix
3993  , bool SO > // Storage order of the target sparse matrix
3994  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
3995  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
3996  {
3998 
3999  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
4000 
4007 
4008  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4009  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4010 
4011  const TmpType tmp( rhs );
4012  smpAssign( ~lhs, tmp );
4013  }
4015  //**********************************************************************************************
4016 
4017  //**Restructuring SMP assignment to column-major matrices***************************************
4032  template< typename MT > // Type of the target matrix
4033  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4034  smpAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
4035  {
4037 
4039 
4040  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4041  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4042 
4043  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4044  smpAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
4045  else if( IsSymmetric<MT1>::value )
4046  smpAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
4047  else
4048  smpAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
4049  }
4051  //**********************************************************************************************
4052 
4053  //**SMP addition assignment to dense matrices***************************************************
4069  template< typename MT // Type of the target dense matrix
4070  , bool SO > // Storage order of the target dense matrix
4071  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4072  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
4073  {
4075 
4076  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4077  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4078 
4079  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4080  return;
4081  }
4082 
4083  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4084  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4085 
4086  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4087  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4088  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4089  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4090  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4091  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4092 
4093  smpAddAssign( ~lhs, A * B );
4094  }
4096  //**********************************************************************************************
4097 
4098  //**Restructuring SMP addition assignment to column-major matrices******************************
4113  template< typename MT > // Type of the target matrix
4114  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4115  smpAddAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
4116  {
4118 
4120 
4121  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4122  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4123 
4124  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4125  smpAddAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
4126  else if( IsSymmetric<MT1>::value )
4127  smpAddAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
4128  else
4129  smpAddAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
4130  }
4132  //**********************************************************************************************
4133 
4134  //**SMP addition assignment to sparse matrices**************************************************
4135  // No special implementation for the SMP addition assignment to sparse matrices.
4136  //**********************************************************************************************
4137 
4138  //**SMP subtraction assignment to dense matrices************************************************
4154  template< typename MT // Type of the target dense matrix
4155  , bool SO > // Storage order of the target dense matrix
4156  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4157  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
4158  {
4160 
4161  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4162  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4163 
4164  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4165  return;
4166  }
4167 
4168  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4169  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4170 
4171  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4172  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4173  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4174  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4175  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4176  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4177 
4178  smpSubAssign( ~lhs, A * B );
4179  }
4181  //**********************************************************************************************
4182 
4183  //**Restructuring SMP subtraction assignment to column-major matrices***************************
4198  template< typename MT > // Type of the target matrix
4199  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4200  smpSubAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
4201  {
4203 
4205 
4206  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4207  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4208 
4209  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4210  smpSubAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
4211  else if( IsSymmetric<MT1>::value )
4212  smpSubAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
4213  else
4214  smpSubAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
4215  }
4217  //**********************************************************************************************
4218 
4219  //**SMP subtraction assignment to sparse matrices***********************************************
4220  // No special implementation for the SMP subtraction assignment to sparse matrices.
4221  //**********************************************************************************************
4222 
4223  //**SMP multiplication assignment to dense matrices*********************************************
4224  // No special implementation for the SMP multiplication assignment to dense matrices.
4225  //**********************************************************************************************
4226 
4227  //**SMP multiplication assignment to sparse matrices********************************************
4228  // No special implementation for the SMP multiplication assignment to sparse matrices.
4229  //**********************************************************************************************
4230 
4231  //**Compile time checks*************************************************************************
4239  //**********************************************************************************************
4240 };
4241 //*************************************************************************************************
4242 
4243 
4244 
4245 
4246 //=================================================================================================
4247 //
4248 // DMATSCALARMULTEXPR SPECIALIZATION
4249 //
4250 //=================================================================================================
4251 
4252 //*************************************************************************************************
4260 template< typename MT1 // Type of the left-hand side dense matrix
4261  , typename MT2 // Type of the right-hand side dense matrix
4262  , typename ST > // Type of the right-hand side scalar value
4263 class DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >
4264  : public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
4265  , private MatScalarMultExpr
4266  , private Computation
4267 {
4268  private:
4269  //**Type definitions****************************************************************************
4270  typedef DMatDMatMultExpr<MT1,MT2> MMM;
4271  typedef typename MMM::ResultType RES;
4272  typedef typename MT1::ResultType RT1;
4273  typedef typename MT2::ResultType RT2;
4274  typedef typename RT1::ElementType ET1;
4275  typedef typename RT2::ElementType ET2;
4276  typedef typename MT1::CompositeType CT1;
4277  typedef typename MT2::CompositeType CT2;
4278  //**********************************************************************************************
4279 
4280  //**********************************************************************************************
4282  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
4283  //**********************************************************************************************
4284 
4285  //**********************************************************************************************
4287  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
4288  //**********************************************************************************************
4289 
4290  //**********************************************************************************************
4292 
4297  template< typename T1, typename T2, typename T3 >
4298  struct CanExploitSymmetry {
4299  enum { value = IsColumnMajorMatrix<T1>::value &&
4300  ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
4301  };
4302  //**********************************************************************************************
4303 
4304  //**********************************************************************************************
4306 
4309  template< typename T1, typename T2, typename T3 >
4310  struct IsEvaluationRequired {
4311  enum { value = ( evaluateLeft || evaluateRight ) &&
4312  !CanExploitSymmetry<T1,T2,T3>::value };
4313  };
4314  //**********************************************************************************************
4315 
4316  //**********************************************************************************************
4318 
4321  template< typename T1, typename T2, typename T3, typename T4 >
4322  struct UseSinglePrecisionKernel {
4323  enum { value = BLAZE_BLAS_MODE &&
4324  HasMutableDataAccess<T1>::value &&
4325  HasConstDataAccess<T2>::value &&
4326  HasConstDataAccess<T3>::value &&
4327  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4328  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4329  IsFloat<typename T1::ElementType>::value &&
4330  IsFloat<typename T2::ElementType>::value &&
4331  IsFloat<typename T3::ElementType>::value &&
4332  !IsComplex<T4>::value };
4333  };
4334  //**********************************************************************************************
4335 
4336  //**********************************************************************************************
4338 
4341  template< typename T1, typename T2, typename T3, typename T4 >
4342  struct UseDoublePrecisionKernel {
4343  enum { value = BLAZE_BLAS_MODE &&
4344  HasMutableDataAccess<T1>::value &&
4345  HasConstDataAccess<T2>::value &&
4346  HasConstDataAccess<T3>::value &&
4347  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4348  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4349  IsDouble<typename T1::ElementType>::value &&
4350  IsDouble<typename T2::ElementType>::value &&
4351  IsDouble<typename T3::ElementType>::value &&
4352  !IsComplex<T4>::value };
4353  };
4354  //**********************************************************************************************
4355 
4356  //**********************************************************************************************
4358 
4361  template< typename T1, typename T2, typename T3 >
4362  struct UseSinglePrecisionComplexKernel {
4363  typedef complex<float> Type;
4364  enum { value = BLAZE_BLAS_MODE &&
4365  HasMutableDataAccess<T1>::value &&
4366  HasConstDataAccess<T2>::value &&
4367  HasConstDataAccess<T3>::value &&
4368  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4369  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4370  IsSame<typename T1::ElementType,Type>::value &&
4371  IsSame<typename T2::ElementType,Type>::value &&
4372  IsSame<typename T3::ElementType,Type>::value };
4373  };
4374  //**********************************************************************************************
4375 
4376  //**********************************************************************************************
4378 
4381  template< typename T1, typename T2, typename T3 >
4382  struct UseDoublePrecisionComplexKernel {
4383  typedef complex<double> Type;
4384  enum { value = BLAZE_BLAS_MODE &&
4385  HasMutableDataAccess<T1>::value &&
4386  HasConstDataAccess<T2>::value &&
4387  HasConstDataAccess<T3>::value &&
4388  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4389  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4390  IsSame<typename T1::ElementType,Type>::value &&
4391  IsSame<typename T2::ElementType,Type>::value &&
4392  IsSame<typename T3::ElementType,Type>::value };
4393  };
4394  //**********************************************************************************************
4395 
4396  //**********************************************************************************************
4398 
4400  template< typename T1, typename T2, typename T3, typename T4 >
4401  struct UseDefaultKernel {
4402  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
4403  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
4404  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
4405  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
4406  };
4407  //**********************************************************************************************
4408 
4409  //**********************************************************************************************
4411 
4413  template< typename T1, typename T2, typename T3, typename T4 >
4414  struct UseVectorizedDefaultKernel {
4415  enum { value = !IsDiagonal<T3>::value &&
4416  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4417  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
4418  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
4419  IsSame<typename T1::ElementType,T4>::value &&
4420  IntrinsicTrait<typename T1::ElementType>::addition &&
4421  IntrinsicTrait<typename T1::ElementType>::subtraction &&
4422  IntrinsicTrait<typename T1::ElementType>::multiplication };
4423  };
4424  //**********************************************************************************************
4425 
4426  public:
4427  //**Type definitions****************************************************************************
4428  typedef DMatScalarMultExpr<MMM,ST,false> This;
4429  typedef typename MultTrait<RES,ST>::Type ResultType;
4430  typedef typename ResultType::OppositeType OppositeType;
4431  typedef typename ResultType::TransposeType TransposeType;
4432  typedef typename ResultType::ElementType ElementType;
4433  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
4434  typedef const ElementType ReturnType;
4435  typedef const ResultType CompositeType;
4436 
4438  typedef const DMatDMatMultExpr<MT1,MT2> LeftOperand;
4439 
4441  typedef ST RightOperand;
4442 
4444  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
4445 
4447  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
4448  //**********************************************************************************************
4449 
4450  //**Compilation flags***************************************************************************
4452  enum { vectorizable = !IsDiagonal<MT2>::value &&
4453  MT1::vectorizable && MT2::vectorizable &&
4454  IsSame<ET1,ET2>::value &&
4455  IsSame<ET1,ST>::value &&
4456  IntrinsicTrait<ET1>::addition &&
4457  IntrinsicTrait<ET1>::multiplication };
4458 
4460  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
4461  !evaluateRight && MT2::smpAssignable };
4462  //**********************************************************************************************
4463 
4464  //**Constructor*********************************************************************************
4470  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
4471  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
4472  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
4473  {}
4474  //**********************************************************************************************
4475 
4476  //**Access operator*****************************************************************************
4483  inline ReturnType operator()( size_t i, size_t j ) const {
4484  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
4485  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
4486  return matrix_(i,j) * scalar_;
4487  }
4488  //**********************************************************************************************
4489 
4490  //**Rows function*******************************************************************************
4495  inline size_t rows() const {
4496  return matrix_.rows();
4497  }
4498  //**********************************************************************************************
4499 
4500  //**Columns function****************************************************************************
4505  inline size_t columns() const {
4506  return matrix_.columns();
4507  }
4508  //**********************************************************************************************
4509 
4510  //**Left operand access*************************************************************************
4515  inline LeftOperand leftOperand() const {
4516  return matrix_;
4517  }
4518  //**********************************************************************************************
4519 
4520  //**Right operand access************************************************************************
4525  inline RightOperand rightOperand() const {
4526  return scalar_;
4527  }
4528  //**********************************************************************************************
4529 
4530  //**********************************************************************************************
4536  template< typename T >
4537  inline bool canAlias( const T* alias ) const {
4538  return matrix_.canAlias( alias );
4539  }
4540  //**********************************************************************************************
4541 
4542  //**********************************************************************************************
4548  template< typename T >
4549  inline bool isAliased( const T* alias ) const {
4550  return matrix_.isAliased( alias );
4551  }
4552  //**********************************************************************************************
4553 
4554  //**********************************************************************************************
4559  inline bool isAligned() const {
4560  return matrix_.isAligned();
4561  }
4562  //**********************************************************************************************
4563 
4564  //**********************************************************************************************
4569  inline bool canSMPAssign() const {
4570  typename MMM::LeftOperand A( matrix_.leftOperand() );
4571  return ( !BLAZE_BLAS_IS_PARALLEL ||
4572  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
4573  ( A.rows() > SMP_DMATDMATMULT_THRESHOLD );
4574  }
4575  //**********************************************************************************************
4576 
4577  private:
4578  //**Member variables****************************************************************************
4579  LeftOperand matrix_;
4580  RightOperand scalar_;
4581  //**********************************************************************************************
4582 
4583  //**Assignment to dense matrices****************************************************************
4595  template< typename MT // Type of the target dense matrix
4596  , bool SO > // Storage order of the target dense matrix
4597  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4598  assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4599  {
4601 
4602  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4603  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4604 
4605  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4606  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4607 
4608  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4609  return;
4610  }
4611  else if( left.columns() == 0UL ) {
4612  reset( ~lhs );
4613  return;
4614  }
4615 
4616  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4617  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4618 
4619  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4620  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4621  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4622  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4623  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4624  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4625 
4626  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
4627  }
4628  //**********************************************************************************************
4629 
4630  //**Assignment to dense matrices (kernel selection)*********************************************
4641  template< typename MT3 // Type of the left-hand side target matrix
4642  , typename MT4 // Type of the left-hand side matrix operand
4643  , typename MT5 // Type of the right-hand side matrix operand
4644  , typename ST2 > // Type of the scalar value
4645  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4646  {
4647  if( ( IsDiagonal<MT5>::value ) ||
4648  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
4649  selectSmallAssignKernel( C, A, B, scalar );
4650  else
4651  selectBlasAssignKernel( C, A, B, scalar );
4652  }
4653  //**********************************************************************************************
4654 
4655  //**Default assignment to dense matrices (general/general)**************************************
4669  template< typename MT3 // Type of the left-hand side target matrix
4670  , typename MT4 // Type of the left-hand side matrix operand
4671  , typename MT5 // Type of the right-hand side matrix operand
4672  , typename ST2 > // Type of the scalar value
4673  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
4674  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4675  {
4676  const size_t M( A.rows() );
4677  const size_t N( B.columns() );
4678  const size_t K( A.columns() );
4679 
4680  for( size_t i=0UL; i<M; ++i )
4681  {
4682  const size_t kbegin( ( IsUpper<MT4>::value )
4683  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4684  :( 0UL ) );
4685  const size_t kend( ( IsLower<MT4>::value )
4686  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4687  :( K ) );
4688  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
4689 
4690  if( IsStrictlyTriangular<MT4>::value && kbegin == kend ) {
4691  for( size_t j=0UL; j<N; ++j ) {
4692  reset( (~C)(i,j) );
4693  }
4694  continue;
4695  }
4696 
4697  {
4698  const size_t jbegin( ( IsUpper<MT5>::value )
4699  ?( IsStrictlyUpper<MT5>::value ? kbegin+1UL : kbegin )
4700  :( 0UL ) );
4701  const size_t jend( ( IsLower<MT5>::value )
4702  ?( IsStrictlyLower<MT5>::value ? kbegin : kbegin+1UL )
4703  :( N ) );
4704  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4705 
4706  if( IsUpper<MT4>::value && IsUpper<MT5>::value ) {
4707  for( size_t j=0UL; j<jbegin; ++j ) {
4708  reset( C(i,j) );
4709  }
4710  }
4711  else if( IsStrictlyUpper<MT5>::value ) {
4712  reset( C(i,0UL) );
4713  }
4714  for( size_t j=jbegin; j<jend; ++j ) {
4715  C(i,j) = A(i,kbegin) * B(kbegin,j);
4716  }
4717  if( IsLower<MT4>::value && IsLower<MT5>::value ) {
4718  for( size_t j=jend; j<N; ++j ) {
4719  reset( C(i,j) );
4720  }
4721  }
4722  else if( IsStrictlyLower<MT5>::value ) {
4723  reset( C(i,N-1UL) );
4724  }
4725  }
4726 
4727  for( size_t k=kbegin+1UL; k<kend; ++k )
4728  {
4729  const size_t jbegin( ( IsUpper<MT5>::value )
4730  ?( IsStrictlyUpper<MT5>::value ? k+1UL : k )
4731  :( 0UL ) );
4732  const size_t jend( ( IsLower<MT5>::value )
4733  ?( IsStrictlyLower<MT5>::value ? k-1UL : k )
4734  :( N ) );
4735  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4736 
4737  for( size_t j=jbegin; j<jend; ++j ) {
4738  C(i,j) += A(i,k) * B(k,j);
4739  }
4740  if( IsLower<MT5>::value ) {
4741  C(i,jend) = A(i,k) * B(k,jend);
4742  }
4743  }
4744 
4745  {
4746  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4747  ?( IsStrictlyUpper<MT4>::value || IsStrictlyUpper<MT5>::value ? i+1UL : i )
4748  :( 0UL ) );
4749  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
4750  ?( IsStrictlyLower<MT4>::value || IsStrictlyLower<MT5>::value ? i : i+1UL )
4751  :( N ) );
4752  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4753 
4754  for( size_t j=jbegin; j<jend; ++j ) {
4755  C(i,j) *= scalar;
4756  }
4757  }
4758  }
4759  }
4760  //**********************************************************************************************
4761 
4762  //**Default assignment to dense matrices (general/diagonal)*************************************
4776  template< typename MT3 // Type of the left-hand side target matrix
4777  , typename MT4 // Type of the left-hand side matrix operand
4778  , typename MT5 // Type of the right-hand side matrix operand
4779  , typename ST2 > // Type of the scalar value
4780  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
4781  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4782  {
4784 
4785  const size_t M( A.rows() );
4786  const size_t N( B.columns() );
4787 
4788  for( size_t i=0UL; i<M; ++i )
4789  {
4790  const size_t jbegin( ( IsUpper<MT4>::value )
4791  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4792  :( 0UL ) );
4793  const size_t jend( ( IsLower<MT4>::value )
4794  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4795  :( N ) );
4796  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4797 
4798  if( IsUpper<MT4>::value ) {
4799  for( size_t j=0UL; j<jbegin; ++j ) {
4800  reset( C(i,j) );
4801  }
4802  }
4803  for( size_t j=jbegin; j<jend; ++j ) {
4804  C(i,j) = A(i,j) * B(j,j) * scalar;
4805  }
4806  if( IsLower<MT4>::value ) {
4807  for( size_t j=jend; j<N; ++j ) {
4808  reset( C(i,j) );
4809  }
4810  }
4811  }
4812  }
4813  //**********************************************************************************************
4814 
4815  //**Default assignment to dense matrices (diagonal/general)*************************************
4829  template< typename MT3 // Type of the left-hand side target matrix
4830  , typename MT4 // Type of the left-hand side matrix operand
4831  , typename MT5 // Type of the right-hand side matrix operand
4832  , typename ST2 > // Type of the scalar value
4833  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
4834  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4835  {
4837 
4838  const size_t M( A.rows() );
4839  const size_t N( B.columns() );
4840 
4841  for( size_t i=0UL; i<M; ++i )
4842  {
4843  const size_t jbegin( ( IsUpper<MT5>::value )
4844  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
4845  :( 0UL ) );
4846  const size_t jend( ( IsLower<MT5>::value )
4847  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
4848  :( N ) );
4849  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4850 
4851  if( IsUpper<MT5>::value ) {
4852  for( size_t j=0UL; j<jbegin; ++j ) {
4853  reset( C(i,j) );
4854  }
4855  }
4856  for( size_t j=jbegin; j<jend; ++j ) {
4857  C(i,j) = A(i,i) * B(i,j) * scalar;
4858  }
4859  if( IsLower<MT5>::value ) {
4860  for( size_t j=jend; j<N; ++j ) {
4861  reset( C(i,j) );
4862  }
4863  }
4864  }
4865  }
4866  //**********************************************************************************************
4867 
4868  //**Default assignment to dense matrices (diagonal/diagonal)************************************
4882  template< typename MT3 // Type of the left-hand side target matrix
4883  , typename MT4 // Type of the left-hand side matrix operand
4884  , typename MT5 // Type of the right-hand side matrix operand
4885  , typename ST2 > // Type of the scalar value
4886  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
4887  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4888  {
4890 
4891  reset( C );
4892 
4893  for( size_t i=0UL; i<A.rows(); ++i ) {
4894  C(i,i) = A(i,i) * B(i,i) * scalar;
4895  }
4896  }
4897  //**********************************************************************************************
4898 
4899  //**Default assignment to dense matrices (small matrices)***************************************
4913  template< typename MT3 // Type of the left-hand side target matrix
4914  , typename MT4 // Type of the left-hand side matrix operand
4915  , typename MT5 // Type of the right-hand side matrix operand
4916  , typename ST2 > // Type of the scalar value
4917  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4918  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4919  {
4920  selectDefaultAssignKernel( C, A, B, scalar );
4921  }
4922  //**********************************************************************************************
4923 
4924  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
4939  template< typename MT3 // Type of the left-hand side target matrix
4940  , typename MT4 // Type of the left-hand side matrix operand
4941  , typename MT5 // Type of the right-hand side matrix operand
4942  , typename ST2 > // Type of the scalar value
4943  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4944  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4945  {
4946  typedef IntrinsicTrait<ElementType> IT;
4947 
4948  const size_t M( A.rows() );
4949  const size_t N( B.columns() );
4950  const size_t K( A.columns() );
4951 
4952  const IntrinsicType factor( set( scalar ) );
4953 
4954  size_t j( 0UL );
4955 
4956  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
4957  for( size_t i=0UL; i<M; ++i )
4958  {
4959  const size_t kbegin( ( IsUpper<MT4>::value )
4960  ?( ( IsLower<MT5>::value )
4961  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4962  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4963  :( IsLower<MT5>::value ? j : 0UL ) );
4964  const size_t kend( ( IsLower<MT4>::value )
4965  ?( ( IsUpper<MT5>::value )
4966  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+IT::size*8UL, K ) )
4967  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4968  :( IsUpper<MT5>::value ? min( j+IT::size*8UL, K ) : K ) );
4969 
4970  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4971 
4972  for( size_t k=kbegin; k<kend; ++k ) {
4973  const IntrinsicType a1( set( A(i,k) ) );
4974  xmm1 = xmm1 + a1 * B.load(k,j );
4975  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
4976  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
4977  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
4978  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
4979  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
4980  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
4981  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
4982  }
4983 
4984  (~C).store( i, j , xmm1 * factor );
4985  (~C).store( i, j+IT::size , xmm2 * factor );
4986  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
4987  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
4988  (~C).store( i, j+IT::size*4UL, xmm5 * factor );
4989  (~C).store( i, j+IT::size*5UL, xmm6 * factor );
4990  (~C).store( i, j+IT::size*6UL, xmm7 * factor );
4991  (~C).store( i, j+IT::size*7UL, xmm8 * factor );
4992  }
4993  }
4994 
4995  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
4996  {
4997  size_t i( 0UL );
4998 
4999  for( ; (i+2UL) <= M; i+=2UL )
5000  {
5001  const size_t kbegin( ( IsUpper<MT4>::value )
5002  ?( ( IsLower<MT5>::value )
5003  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5004  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5005  :( IsLower<MT5>::value ? j : 0UL ) );
5006  const size_t kend( ( IsLower<MT4>::value )
5007  ?( ( IsUpper<MT5>::value )
5008  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*4UL, K ) )
5009  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
5010  :( IsUpper<MT5>::value ? min( j+IT::size*4UL, K ) : K ) );
5011 
5012  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5013 
5014  for( size_t k=kbegin; k<kend; ++k ) {
5015  const IntrinsicType a1( set( A(i ,k) ) );
5016  const IntrinsicType a2( set( A(i+1UL,k) ) );
5017  const IntrinsicType b1( B.load(k,j ) );
5018  const IntrinsicType b2( B.load(k,j+IT::size ) );
5019  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
5020  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
5021  xmm1 = xmm1 + a1 * b1;
5022  xmm2 = xmm2 + a1 * b2;
5023  xmm3 = xmm3 + a1 * b3;
5024  xmm4 = xmm4 + a1 * b4;
5025  xmm5 = xmm5 + a2 * b1;
5026  xmm6 = xmm6 + a2 * b2;
5027  xmm7 = xmm7 + a2 * b3;
5028  xmm8 = xmm8 + a2 * b4;
5029  }
5030 
5031  (~C).store( i , j , xmm1 * factor );
5032  (~C).store( i , j+IT::size , xmm2 * factor );
5033  (~C).store( i , j+IT::size*2UL, xmm3 * factor );
5034  (~C).store( i , j+IT::size*3UL, xmm4 * factor );
5035  (~C).store( i+1UL, j , xmm5 * factor );
5036  (~C).store( i+1UL, j+IT::size , xmm6 * factor );
5037  (~C).store( i+1UL, j+IT::size*2UL, xmm7 * factor );
5038  (~C).store( i+1UL, j+IT::size*3UL, xmm8 * factor );
5039  }
5040 
5041  if( i < M )
5042  {
5043  const size_t kbegin( ( IsUpper<MT4>::value )
5044  ?( ( IsLower<MT5>::value )
5045  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5046  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5047  :( IsLower<MT5>::value ? j : 0UL ) );
5048  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, K ) ):( K ) );
5049 
5050  IntrinsicType xmm1, xmm2, xmm3, xmm4;
5051 
5052  for( size_t k=kbegin; k<kend; ++k ) {
5053  const IntrinsicType a1( set( A(i,k) ) );
5054  xmm1 = xmm1 + a1 * B.load(k,j );
5055  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
5056  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
5057  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
5058  }
5059 
5060  (~C).store( i, j , xmm1 * factor );
5061  (~C).store( i, j+IT::size , xmm2 * factor );
5062  (~C).store( i, j+IT::size*2UL, xmm3 * factor );
5063  (~C).store( i, j+IT::size*3UL, xmm4 * factor );
5064  }
5065  }
5066 
5067  for( ; (j+IT::size) < N; j+=IT::size*2UL )
5068  {
5069  size_t i( 0UL );
5070 
5071  for( ; (i+2UL) <= M; i+=2UL )
5072  {
5073  const size_t kbegin( ( IsUpper<MT4>::value )
5074  ?( ( IsLower<MT5>::value )
5075  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5076  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5077  :( IsLower<MT5>::value ? j : 0UL ) );
5078  const size_t kend( ( IsLower<MT4>::value )
5079  ?( ( IsUpper<MT5>::value )
5080  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*2UL, K ) )
5081  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
5082  :( IsUpper<MT5>::value ? min( j+IT::size*2UL, K ) : K ) );
5083 
5084  IntrinsicType xmm1, xmm2, xmm3, xmm4;
5085 
5086  for( size_t k=kbegin; k<kend; ++k ) {
5087  const IntrinsicType a1( set( A(i ,k) ) );
5088  const IntrinsicType a2( set( A(i+1UL,k) ) );
5089  const IntrinsicType b1( B.load(k,j ) );
5090  const IntrinsicType b2( B.load(k,j+IT::size) );
5091  xmm1 = xmm1 + a1 * b1;
5092  xmm2 = xmm2 + a1 * b2;
5093  xmm3 = xmm3 + a2 * b1;
5094  xmm4 = xmm4 + a2 * b2;
5095  }
5096 
5097  (~C).store( i , j , xmm1 * factor );
5098  (~C).store( i , j+IT::size, xmm2 * factor );
5099  (~C).store( i+1UL, j , xmm3 * factor );
5100  (~C).store( i+1UL, j+IT::size, xmm4 * factor );
5101  }
5102 
5103  if( i < M )
5104  {
5105  const size_t kbegin( ( IsUpper<MT4>::value )
5106  ?( ( IsLower<MT5>::value )
5107  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5108  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5109  :( IsLower<MT5>::value ? j : 0UL ) );
5110  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, K ) ):( K ) );
5111 
5112  IntrinsicType xmm1, xmm2;
5113 
5114  for( size_t k=kbegin; k<kend; ++k ) {
5115  const IntrinsicType a1( set( A(i,k) ) );
5116  xmm1 = xmm1 + a1 * B.load(k,j );
5117  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
5118  }
5119 
5120  (~C).store( i, j , xmm1 * factor );
5121  (~C).store( i, j+IT::size, xmm2 * factor );
5122  }
5123  }
5124 
5125  if( j < N )
5126  {
5127  size_t i( 0UL );
5128 
5129  for( ; (i+2UL) <= M; i+=2UL )
5130  {
5131  const size_t kbegin( ( IsUpper<MT4>::value )
5132  ?( ( IsLower<MT5>::value )
5133  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5134  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5135  :( IsLower<MT5>::value ? j : 0UL ) );
5136  const size_t kend( ( IsLower<MT4>::value )
5137  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
5138  :( K ) );
5139 
5140  IntrinsicType xmm1, xmm2;
5141 
5142  for( size_t k=kbegin; k<kend; ++k ) {
5143  const IntrinsicType b1( B.load(k,j) );
5144  xmm1 = xmm1 + set( A(i ,k) ) * b1;
5145  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
5146  }
5147 
5148  (~C).store( i , j, xmm1 * factor );
5149  (~C).store( i+1UL, j, xmm2 * factor );
5150  }
5151 
5152  if( i < M )
5153  {
5154  const size_t kbegin( ( IsUpper<MT4>::value )
5155  ?( ( IsLower<MT5>::value )
5156  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5157  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5158  :( IsLower<MT5>::value ? j : 0UL ) );
5159 
5160  IntrinsicType xmm1;
5161 
5162  for( size_t k=kbegin; k<K; ++k ) {
5163  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
5164  }
5165 
5166  (~C).store( i, j, xmm1 * factor );
5167  }
5168  }
5169  }
5170  //**********************************************************************************************
5171 
5172  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
5187  template< typename MT3 // Type of the left-hand side target matrix
5188  , typename MT4 // Type of the left-hand side matrix operand
5189  , typename MT5 // Type of the right-hand side matrix operand
5190  , typename ST2 > // Type of the scalar value
5191  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5192  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5193  {
5198 
5199  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
5200  const typename MT4::OppositeType tmp( serial( A ) );
5201  assign( ~C, tmp * B * scalar );
5202  }
5203  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
5204  const typename MT5::OppositeType tmp( serial( B ) );
5205  assign( ~C, A * tmp * scalar );
5206  }
5207  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
5208  const typename MT4::OppositeType tmp( serial( A ) );
5209  assign( ~C, tmp * B * scalar );
5210  }
5211  else {
5212  const typename MT5::OppositeType tmp( serial( B ) );
5213  assign( ~C, A * tmp * scalar );
5214  }
5215  }
5216  //**********************************************************************************************
5217 
5218  //**Default assignment to dense matrices (large matrices)***************************************
5232  template< typename MT3 // Type of the left-hand side target matrix
5233  , typename MT4 // Type of the left-hand side matrix operand
5234  , typename MT5 // Type of the right-hand side matrix operand
5235  , typename ST2 > // Type of the scalar value
5236  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5237  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5238  {
5239  selectDefaultAssignKernel( C, A, B, scalar );
5240  }
5241  //**********************************************************************************************
5242 
5243  //**Vectorized default assignment to row-major dense matrices (large matrices)******************
5258  template< typename MT3 // Type of the left-hand side target matrix
5259  , typename MT4 // Type of the left-hand side matrix operand
5260  , typename MT5 // Type of the right-hand side matrix operand
5261  , typename ST2 > // Type of the scalar value
5262  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5263  selectLargeAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5264  {
5265  typedef IntrinsicTrait<ElementType> IT;
5266 
5267  const size_t M( A.rows() );
5268  const size_t N( B.columns() );
5269  const size_t K( A.columns() );
5270 
5271  const size_t iblock( 64UL );
5272  const size_t jblock( 128UL );
5273  const size_t kblock( 128UL );
5274 
5275  const IntrinsicType factor( set( scalar ) );
5276 
5277  for( size_t jj=0UL; jj<N; jj+=jblock )
5278  {
5279  const size_t jend( min( jj+jblock, N ) );
5280 
5281  for( size_t ii=0UL; ii<M; ii+=iblock )
5282  {
5283  const size_t iend( min( ii+iblock, M ) );
5284 
5285  for( size_t i=ii; i<iend; ++i ) {
5286  for( size_t j=jj; j<jend; ++j ) {
5287  reset( (~C)(i,j) );
5288  }
5289  }
5290 
5291  for( size_t kk=0UL; kk<K; kk+=kblock )
5292  {
5293  const size_t ktmp( min( kk+kblock, K ) );
5294 
5295  size_t j( jj );
5296 
5297  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
5298  {
5299  const size_t j1( j+IT::size );
5300  const size_t j2( j+IT::size*2UL );
5301  const size_t j3( j+IT::size*3UL );
5302 
5303  size_t i( ii );
5304 
5305  for( ; (i+2UL) <= iend; i+=2UL )
5306  {
5307  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5308  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5309  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
5310  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
5311 
5312  IntrinsicType xmm1( (~C).load(i ,j ) );
5313  IntrinsicType xmm2( (~C).load(i ,j1) );
5314  IntrinsicType xmm3( (~C).load(i ,j2) );
5315  IntrinsicType xmm4( (~C).load(i ,j3) );
5316  IntrinsicType xmm5( (~C).load(i+1UL,j ) );
5317  IntrinsicType xmm6( (~C).load(i+1UL,j1) );
5318  IntrinsicType xmm7( (~C).load(i+1UL,j2) );
5319  IntrinsicType xmm8( (~C).load(i+1UL,j3) );
5320 
5321  for( size_t k=kbegin; k<kend; ++k ) {
5322  const IntrinsicType a1( set( A(i ,k) ) );
5323  const IntrinsicType a2( set( A(i+1UL,k) ) );
5324  const IntrinsicType b1( B.load(k,j ) );
5325  const IntrinsicType b2( B.load(k,j1) );
5326  const IntrinsicType b3( B.load(k,j2) );
5327  const IntrinsicType b4( B.load(k,j3) );
5328  xmm1 = xmm1 + a1 * b1;
5329  xmm2 = xmm2 + a1 * b2;
5330  xmm3 = xmm3 + a1 * b3;
5331  xmm4 = xmm4 + a1 * b4;
5332  xmm5 = xmm5 + a2 * b1;
5333  xmm6 = xmm6 + a2 * b2;
5334  xmm7 = xmm7 + a2 * b3;
5335  xmm8 = xmm8 + a2 * b4;
5336  }
5337 
5338  (~C).store( i , j , xmm1 * factor );
5339  (~C).store( i , j1, xmm2 * factor );
5340  (~C).store( i , j2, xmm3 * factor );
5341  (~C).store( i , j3, xmm4 * factor );
5342  (~C).store( i+1UL, j , xmm5 * factor );
5343  (~C).store( i+1UL, j1, xmm6 * factor );
5344  (~C).store( i+1UL, j2, xmm7 * factor );
5345  (~C).store( i+1UL, j3, xmm8 * factor );
5346  }
5347 
5348  if( i < iend )
5349  {
5350  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5351  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5352  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
5353  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
5354 
5355  IntrinsicType xmm1( (~C).load(i,j ) );
5356  IntrinsicType xmm2( (~C).load(i,j1) );
5357  IntrinsicType xmm3( (~C).load(i,j2) );
5358  IntrinsicType xmm4( (~C).load(i,j3) );
5359 
5360  for( size_t k=kbegin; k<kend; ++k ) {
5361  const IntrinsicType a1( set( A(i,k) ) );
5362  xmm1 = xmm1 + a1 * B.load(k,j );
5363  xmm2 = xmm2 + a1 * B.load(k,j1);
5364  xmm3 = xmm3 + a1 * B.load(k,j2);
5365  xmm4 = xmm4 + a1 * B.load(k,j3);
5366  }
5367 
5368  (~C).store( i, j , xmm1 * factor );
5369  (~C).store( i, j1, xmm2 * factor );
5370  (~C).store( i, j2, xmm3 * factor );
5371  (~C).store( i, j3, xmm4 * factor );
5372  }
5373  }
5374 
5375  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
5376  {
5377  const size_t j1( j+IT::size );
5378 
5379  size_t i( ii );
5380 
5381  for( ; (i+4UL) <= iend; i+=4UL )
5382  {
5383  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5384  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5385  const size_t kend ( min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
5386  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
5387 
5388  IntrinsicType xmm1( (~C).load(i ,j ) );
5389  IntrinsicType xmm2( (~C).load(i ,j1) );
5390  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
5391  IntrinsicType xmm4( (~C).load(i+1UL,j1) );
5392  IntrinsicType xmm5( (~C).load(i+2UL,j ) );
5393  IntrinsicType xmm6( (~C).load(i+2UL,j1) );
5394  IntrinsicType xmm7( (~C).load(i+3UL,j ) );
5395  IntrinsicType xmm8( (~C).load(i+3UL,j1) );
5396 
5397  for( size_t k=kbegin; k<kend; ++k ) {
5398  const IntrinsicType a1( set( A(i ,k) ) );
5399  const IntrinsicType a2( set( A(i+1UL,k) ) );
5400  const IntrinsicType a3( set( A(i+2UL,k) ) );
5401  const IntrinsicType a4( set( A(i+3UL,k) ) );
5402  const IntrinsicType b1( B.load(k,j ) );
5403  const IntrinsicType b2( B.load(k,j1) );
5404  xmm1 = xmm1 + a1 * b1;
5405  xmm2 = xmm2 + a1 * b2;
5406  xmm3 = xmm3 + a2 * b1;
5407  xmm4 = xmm4 + a2 * b2;
5408  xmm5 = xmm5 + a3 * b1;
5409  xmm6 = xmm6 + a3 * b2;
5410  xmm7 = xmm7 + a4 * b1;
5411  xmm8 = xmm8 + a4 * b2;
5412  }
5413 
5414  (~C).store( i , j , xmm1 * factor );
5415  (~C).store( i , j1, xmm2 * factor );
5416  (~C).store( i+1UL, j , xmm3 * factor );
5417  (~C).store( i+1UL, j1, xmm4 * factor );
5418  (~C).store( i+2UL, j , xmm5 * factor );
5419  (~C).store( i+2UL, j1, xmm6 * factor );
5420  (~C).store( i+3UL, j , xmm7 * factor );
5421  (~C).store( i+3UL, j1, xmm8 * factor );
5422  }
5423 
5424  for( ; (i+2UL) <= iend; i+=2UL )
5425  {
5426  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5427  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5428  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
5429  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
5430 
5431  IntrinsicType xmm1( (~C).load(i ,j ) );
5432  IntrinsicType xmm2( (~C).load(i ,j1) );
5433  IntrinsicType xmm3( (~C).load(i+1UL,j ) );
5434  IntrinsicType xmm4( (~C).load(i+1UL,j1) );
5435 
5436  for( size_t k=kbegin; k<kend; ++k ) {
5437  const IntrinsicType a1( set( A(i ,k) ) );
5438  const IntrinsicType a2( set( A(i+1UL,k) ) );
5439  const IntrinsicType b1( B.load(k,j ) );
5440  const IntrinsicType b2( B.load(k,j1) );
5441  xmm1 = xmm1 + a1 * b1;
5442  xmm2 = xmm2 + a1 * b2;
5443  xmm3 = xmm3 + a2 * b1;
5444  xmm4 = xmm4 + a2 * b2;
5445  }
5446 
5447  (~C).store( i , j , xmm1 * factor );
5448  (~C).store( i , j1, xmm2 * factor );
5449  (~C).store( i+1UL, j , xmm3 * factor );
5450  (~C).store( i+1UL, j1, xmm4 * factor );
5451  }
5452 
5453  if( i < iend )
5454  {
5455  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5456  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5457  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
5458  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
5459 
5460  IntrinsicType xmm1( (~C).load(i,j ) );
5461  IntrinsicType xmm2( (~C).load(i,j1) );
5462 
5463  for( size_t k=kbegin; k<kend; ++k ) {
5464  const IntrinsicType a1( set( A(i,k) ) );
5465  xmm1 = xmm1 + a1 * B.load(k,j );
5466  xmm2 = xmm2 + a1 * B.load(k,j1);
5467  }
5468 
5469  (~C).store( i, j , xmm1 * factor );
5470  (~C).store( i, j1, xmm2 * factor );
5471  }
5472  }
5473 
5474  if( j < jend )
5475  {
5476  for( size_t i=ii; i<iend; ++i )
5477  {
5478  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5479  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5480  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
5481  ( IsUpper<MT5>::value )?( min( j+IT::size, ktmp ) ):( ktmp ) ) );
5482 
5483  IntrinsicType xmm1( (~C).load(i,j) );
5484 
5485  for( size_t k=kbegin; k<kend; ++k ) {
5486  const IntrinsicType a1( set( A(i,k) ) );
5487  xmm1 = xmm1 + a1 * B.load(k,j);
5488  }
5489 
5490  (~C).store( i, j, xmm1 * factor );
5491  }
5492  }
5493  }
5494  }
5495  }
5496  }
5497  //**********************************************************************************************
5498 
5499  //**Vectorized default assignment to column-major dense matrices (large matrices)***************
5513  template< typename MT3 // Type of the left-hand side target matrix
5514  , typename MT4 // Type of the left-hand side matrix operand
5515  , typename MT5 // Type of the right-hand side matrix operand
5516  , typename ST2 > // Type of the scalar value
5517  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5518  selectLargeAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5519  {
5520  selectSmallAssignKernel( ~C, A, B, scalar );
5521  }
5522  //**********************************************************************************************
5523 
5524  //**BLAS-based assignment to dense matrices (default)*******************************************
5538  template< typename MT3 // Type of the left-hand side target matrix
5539  , typename MT4 // Type of the left-hand side matrix operand
5540  , typename MT5 // Type of the right-hand side matrix operand
5541  , typename ST2 > // Type of the scalar value
5542  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5543  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5544  {
5545  selectLargeAssignKernel( C, A, B, scalar );
5546  }
5547  //**********************************************************************************************
5548 
5549  //**BLAS-based assignment to dense matrices (single precision)**********************************
5550 #if BLAZE_BLAS_MODE
5551 
5564  template< typename MT3 // Type of the left-hand side target matrix
5565  , typename MT4 // Type of the left-hand side matrix operand
5566  , typename MT5 // Type of the right-hand side matrix operand
5567  , typename ST2 > // Type of the scalar value
5568  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
5569  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5570  {
5571  if( IsTriangular<MT4>::value ) {
5572  assign( C, B );
5573  strmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
5574  }
5575  else if( IsTriangular<MT5>::value ) {
5576  assign( C, A );
5577  strmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
5578  }
5579  else {
5580  sgemm( C, A, B, scalar, 0.0F );
5581  }
5582  }
5583 #endif
5584  //**********************************************************************************************
5585 
5586  //**BLAS-based assignment to dense matrices (double precision)**********************************
5587 #if BLAZE_BLAS_MODE
5588 
5601  template< typename MT3 // Type of the left-hand side target matrix
5602  , typename MT4 // Type of the left-hand side matrix operand
5603  , typename MT5 // Type of the right-hand side matrix operand
5604  , typename ST2 > // Type of the scalar value
5605  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
5606  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5607  {
5608  if( IsTriangular<MT4>::value ) {
5609  assign( C, B );
5610  dtrmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
5611  }
5612  else if( IsTriangular<MT5>::value ) {
5613  assign( C, A );
5614  dtrmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
5615  }
5616  else {
5617  dgemm( C, A, B, scalar, 0.0 );
5618  }
5619  }
5620 #endif
5621  //**********************************************************************************************
5622 
5623  //**BLAS-based assignment to dense matrices (single precision complex)**************************
5624 #if BLAZE_BLAS_MODE
5625 
5638  template< typename MT3 // Type of the left-hand side target matrix
5639  , typename MT4 // Type of the left-hand side matrix operand
5640  , typename MT5 // Type of the right-hand side matrix operand
5641  , typename ST2 > // Type of the scalar value
5642  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
5643  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5644  {
5645  if( IsTriangular<MT4>::value ) {
5646  assign( C, B );
5647  ctrmm( C, A, CblasLeft,
5648  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
5649  complex<float>( scalar, 0.0F ) );
5650  }
5651  else if( IsTriangular<MT5>::value ) {
5652  assign( C, A );
5653  ctrmm( C, B, CblasRight,
5654  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
5655  complex<float>( scalar, 0.0F ) );
5656  }
5657  else {
5658  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
5659  }
5660  }
5661 #endif
5662  //**********************************************************************************************
5663 
5664  //**BLAS-based assignment to dense matrices (double precision complex)**************************
5665 #if BLAZE_BLAS_MODE
5666 
5679  template< typename MT3 // Type of the left-hand side target matrix
5680  , typename MT4 // Type of the left-hand side matrix operand
5681  , typename MT5 // Type of the right-hand side matrix operand
5682  , typename ST2 > // Type of the scalar
5683  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
5684  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5685  {
5686  if( IsTriangular<MT4>::value ) {
5687  assign( C, B );
5688  ztrmm( C, A, CblasLeft,
5689  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
5690  complex<double>( scalar, 0.0 ) );
5691  }
5692  else if( IsTriangular<MT5>::value ) {
5693  assign( C, A );
5694  ztrmm( C, B, CblasRight,
5695  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
5696  complex<double>( scalar, 0.0 ) );
5697  }
5698  else {
5699  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
5700  }
5701  }
5702 #endif
5703  //**********************************************************************************************
5704 
5705  //**Assignment to sparse matrices***************************************************************
5717  template< typename MT // Type of the target sparse matrix
5718  , bool SO > // Storage order of the target sparse matrix
5719  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
5720  assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5721  {
5723 
5724  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
5725 
5732 
5733  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5734  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5735 
5736  const TmpType tmp( serial( rhs ) );
5737  assign( ~lhs, tmp );
5738  }
5739  //**********************************************************************************************
5740 
5741  //**Restructuring assignment to column-major matrices*******************************************
5755  template< typename MT > // Type of the target matrix
5756  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
5757  assign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
5758  {
5760 
5762 
5763  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5764  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5765 
5766  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
5767  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
5768 
5769  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
5770  assign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
5771  else if( IsSymmetric<MT1>::value )
5772  assign( ~lhs, trans( left ) * right * rhs.scalar_ );
5773  else
5774  assign( ~lhs, left * trans( right ) * rhs.scalar_ );
5775  }
5776  //**********************************************************************************************
5777 
5778  //**Addition assignment to dense matrices*******************************************************
5790  template< typename MT // Type of the target dense matrix
5791  , bool SO > // Storage order of the target dense matrix
5792  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
5793  addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5794  {
5796 
5797  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5798  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5799 
5800  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
5801  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
5802 
5803  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
5804  return;
5805  }
5806 
5807  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
5808  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
5809 
5810  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5811  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
5812  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
5813  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
5814  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
5815  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
5816 
5817  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
5818  }
5819  //**********************************************************************************************
5820 
5821  //**Addition assignment to dense matrices (kernel selection)************************************
5832  template< typename MT3 // Type of the left-hand side target matrix
5833  , typename MT4 // Type of the left-hand side matrix operand
5834  , typename MT5 // Type of the right-hand side matrix operand
5835  , typename ST2 > // Type of the scalar value
5836  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5837  {
5838  if( ( IsDiagonal<MT5>::value ) ||
5839  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
5840  selectSmallAddAssignKernel( C, A, B, scalar );
5841  else
5842  selectBlasAddAssignKernel( C, A, B, scalar );
5843  }
5844  //**********************************************************************************************
5845 
5846  //**Default addition assignment to dense matrices (general/general)*****************************
5860  template< typename MT3 // Type of the left-hand side target matrix
5861  , typename MT4 // Type of the left-hand side matrix operand
5862  , typename MT5 // Type of the right-hand side matrix operand
5863  , typename ST2 > // Type of the scalar value
5864  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
5865  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5866  {
5867  const ResultType tmp( serial( A * B * scalar ) );
5868  addAssign( C, tmp );
5869  }
5870  //**********************************************************************************************
5871 
5872  //**Default addition assignment to dense matrices (general/diagonal)****************************
5886  template< typename MT3 // Type of the left-hand side target matrix
5887  , typename MT4 // Type of the left-hand side matrix operand
5888  , typename MT5 // Type of the right-hand side matrix operand
5889  , typename ST2 > // Type of the scalar value
5890  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
5891  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5892  {
5894 
5895  const size_t M( A.rows() );
5896  const size_t N( B.columns() );
5897 
5898  for( size_t i=0UL; i<M; ++i )
5899  {
5900  const size_t jbegin( ( IsUpper<MT4>::value )
5901  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
5902  :( 0UL ) );
5903  const size_t jend( ( IsLower<MT4>::value )
5904  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
5905  :( N ) );
5906  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
5907 
5908  const size_t jnum( jend - jbegin );
5909  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
5910 
5911  for( size_t j=jbegin; j<jpos; j+=2UL ) {
5912  C(i,j ) += A(i,j ) * B(j ,j ) * scalar;
5913  C(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
5914  }
5915  if( jpos < jend ) {
5916  C(i,jpos) += A(i,jpos) * B(jpos,jpos) * scalar;
5917  }
5918  }
5919  }
5920  //**********************************************************************************************
5921 
5922  //**Default addition assignment to dense matrices (diagonal/general)****************************
5936  template< typename MT3 // Type of the left-hand side target matrix
5937  , typename MT4 // Type of the left-hand side matrix operand
5938  , typename MT5 // Type of the right-hand side matrix operand
5939  , typename ST2 > // Type of the scalar value
5940  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
5941  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5942  {
5944 
5945  const size_t M( A.rows() );
5946  const size_t N( B.columns() );
5947 
5948  for( size_t i=0UL; i<M; ++i )
5949  {
5950  const size_t jbegin( ( IsUpper<MT5>::value )
5951  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
5952  :( 0UL ) );
5953  const size_t jend( ( IsLower<MT5>::value )
5954  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
5955  :( N ) );
5956  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
5957 
5958  const size_t jnum( jend - jbegin );
5959  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
5960 
5961  for( size_t j=jbegin; j<jpos; j+=2UL ) {
5962  C(i,j ) += A(i,i) * B(i,j ) * scalar;
5963  C(i,j+1UL) += A(i,i) * B(i,j+1UL) * scalar;
5964  }
5965  if( jpos < jend ) {
5966  C(i,jpos) += A(i,i) * B(i,jpos) * scalar;
5967  }
5968  }
5969  }
5970  //**********************************************************************************************
5971 
5972  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
5986  template< typename MT3 // Type of the left-hand side target matrix
5987  , typename MT4 // Type of the left-hand side matrix operand
5988  , typename MT5 // Type of the right-hand side matrix operand
5989  , typename ST2 > // Type of the scalar value
5990  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
5991  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5992  {
5994 
5995  for( size_t i=0UL; i<A.rows(); ++i ) {
5996  C(i,i) += A(i,i) * B(i,i) * scalar;
5997  }
5998  }
5999  //**********************************************************************************************
6000 
6001  //**Default addition assignment to dense matrices (small matrices)******************************
6015  template< typename MT3 // Type of the left-hand side target matrix
6016  , typename MT4 // Type of the left-hand side matrix operand
6017  , typename MT5 // Type of the right-hand side matrix operand
6018  , typename ST2 > // Type of the scalar value
6019  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6020  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6021  {
6022  selectDefaultAddAssignKernel( C, A, B, scalar );
6023  }
6024  //**********************************************************************************************
6025 
6026  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
6041  template< typename MT3 // Type of the left-hand side target matrix
6042  , typename MT4 // Type of the left-hand side matrix operand
6043  , typename MT5 // Type of the right-hand side matrix operand
6044  , typename ST2 > // Type of the scalar value
6045  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6046  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6047  {
6048  typedef IntrinsicTrait<ElementType> IT;
6049 
6050  const size_t M( A.rows() );
6051  const size_t N( B.columns() );
6052  const size_t K( A.columns() );
6053 
6054  const IntrinsicType factor( set( scalar ) );
6055 
6056  size_t j( 0UL );
6057 
6058  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
6059  for( size_t i=0UL; i<M; ++i )
6060  {
6061  const size_t kbegin( ( IsUpper<MT4>::value )
6062  ?( ( IsLower<MT5>::value )
6063  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6064  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6065  :( IsLower<MT5>::value ? j : 0UL ) );
6066  const size_t kend( ( IsLower<MT4>::value )
6067  ?( ( IsUpper<MT5>::value )
6068  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+IT::size*8UL, K ) )
6069  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
6070  :( IsUpper<MT5>::value ? min( j+IT::size*8UL, K ) : K ) );
6071 
6072  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6073 
6074  for( size_t k=kbegin; k<kend; ++k ) {
6075  const IntrinsicType a1( set( A(i,k) ) );
6076  xmm1 = xmm1 + a1 * B.load(k,j );
6077  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
6078  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
6079  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
6080  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
6081  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
6082  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
6083  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
6084  }
6085 
6086  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
6087  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
6088  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
6089  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
6090  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) + xmm5 * factor );
6091  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) + xmm6 * factor );
6092  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) + xmm7 * factor );
6093  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) + xmm8 * factor );
6094  }
6095  }
6096 
6097  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
6098  {
6099  size_t i( 0UL );
6100 
6101  for( ; (i+2UL) <= M; i+=2UL )
6102  {
6103  const size_t kbegin( ( IsUpper<MT4>::value )
6104  ?( ( IsLower<MT5>::value )
6105  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6106  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6107  :( IsLower<MT5>::value ? j : 0UL ) );
6108  const size_t kend( ( IsLower<MT4>::value )
6109  ?( ( IsUpper<MT5>::value )
6110  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*4UL, K ) )
6111  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
6112  :( IsUpper<MT5>::value ? min( j+IT::size*4UL, K ) : K ) );
6113 
6114  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6115 
6116  for( size_t k=kbegin; k<kend; ++k ) {
6117  const IntrinsicType a1( set( A(i ,k) ) );
6118  const IntrinsicType a2( set( A(i+1UL,k) ) );
6119  const IntrinsicType b1( B.load(k,j ) );
6120  const IntrinsicType b2( B.load(k,j+IT::size ) );
6121  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
6122  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
6123  xmm1 = xmm1 + a1 * b1;
6124  xmm2 = xmm2 + a1 * b2;
6125  xmm3 = xmm3 + a1 * b3;
6126  xmm4 = xmm4 + a1 * b4;
6127  xmm5 = xmm5 + a2 * b1;
6128  xmm6 = xmm6 + a2 * b2;
6129  xmm7 = xmm7 + a2 * b3;
6130  xmm8 = xmm8 + a2 * b4;
6131  }
6132 
6133  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6134  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) + xmm2 * factor );
6135  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) + xmm3 * factor );
6136  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) + xmm4 * factor );
6137  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
6138  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) + xmm6 * factor );
6139  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) + xmm7 * factor );
6140  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) + xmm8 * factor );
6141  }
6142 
6143  if( i < M )
6144  {
6145  const size_t kbegin( ( IsUpper<MT4>::value )
6146  ?( ( IsLower<MT5>::value )
6147  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6148  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6149  :( IsLower<MT5>::value ? j : 0UL ) );
6150  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, K ) ):( K ) );
6151 
6152  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6153 
6154  for( size_t k=kbegin; k<kend; ++k ) {
6155  const IntrinsicType a1( set( A(i,k) ) );
6156  xmm1 = xmm1 + a1 * B.load(k,j );
6157  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
6158  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
6159  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
6160  }
6161 
6162  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
6163  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
6164  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
6165  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
6166  }
6167  }
6168 
6169  for( ; (j+IT::size) < N; j+=IT::size*2UL )
6170  {
6171  size_t i( 0UL );
6172 
6173  for( ; (i+2UL) <= M; i+=2UL )
6174  {
6175  const size_t kbegin( ( IsUpper<MT4>::value )
6176  ?( ( IsLower<MT5>::value )
6177  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6178  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6179  :( IsLower<MT5>::value ? j : 0UL ) );
6180  const size_t kend( ( IsLower<MT4>::value )
6181  ?( ( IsUpper<MT5>::value )
6182  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*2UL, K ) )
6183  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
6184  :( IsUpper<MT5>::value ? min( j+IT::size*2UL, K ) : K ) );
6185 
6186  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6187 
6188  for( size_t k=kbegin; k<kend; ++k ) {
6189  const IntrinsicType a1( set( A(i ,k) ) );
6190  const IntrinsicType a2( set( A(i+1UL,k) ) );
6191  const IntrinsicType b1( B.load(k,j ) );
6192  const IntrinsicType b2( B.load(k,j+IT::size) );
6193  xmm1 = xmm1 + a1 * b1;
6194  xmm2 = xmm2 + a1 * b2;
6195  xmm3 = xmm3 + a2 * b1;
6196  xmm4 = xmm4 + a2 * b2;
6197  }
6198 
6199  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6200  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) + xmm2 * factor );
6201  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
6202  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) + xmm4 * factor );
6203  }
6204 
6205  if( i < M )
6206  {
6207  const size_t kbegin( ( IsUpper<MT4>::value )
6208  ?( ( IsLower<MT5>::value )
6209  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6210  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6211  :( IsLower<MT5>::value ? j : 0UL ) );
6212  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, K ) ):( K ) );
6213 
6214  IntrinsicType xmm1, xmm2;
6215 
6216  for( size_t k=kbegin; k<kend; ++k ) {
6217  const IntrinsicType a1( set( A(i,k) ) );
6218  xmm1 = xmm1 + a1 * B.load(k,j );
6219  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
6220  }
6221 
6222  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
6223  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) + xmm2 * factor );
6224  }
6225  }
6226 
6227  if( j < N )
6228  {
6229  size_t i( 0UL );
6230 
6231  for( ; (i+2UL) <= M; i+=2UL )
6232  {
6233  const size_t kbegin( ( IsUpper<MT4>::value )
6234  ?( ( IsLower<MT5>::value )
6235  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6236  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6237  :( IsLower<MT5>::value ? j : 0UL ) );
6238  const size_t kend( ( IsLower<MT4>::value )
6239  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
6240  :( K ) );
6241 
6242  IntrinsicType xmm1, xmm2;
6243 
6244  for( size_t k=kbegin; k<kend; ++k ) {
6245  const IntrinsicType b1( B.load(k,j) );
6246  xmm1 = xmm1 + set( A(i ,k) ) * b1;
6247  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
6248  }
6249 
6250  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
6251  (~C).store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
6252  }
6253 
6254  if( i < M )
6255  {
6256  const size_t kbegin( ( IsUpper<MT4>::value )
6257  ?( ( IsLower<MT5>::value )
6258  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6259  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6260  :( IsLower<MT5>::value ? j : 0UL ) );
6261 
6262  IntrinsicType xmm1;
6263 
6264  for( size_t k=kbegin; k<K; ++k ) {
6265  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
6266  }
6267 
6268  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
6269  }
6270  }
6271  }
6272  //**********************************************************************************************
6273 
6274  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
6289  template< typename MT3 // Type of the left-hand side target matrix
6290  , typename MT4 // Type of the left-hand side matrix operand
6291  , typename MT5 // Type of the right-hand side matrix operand
6292  , typename ST2 > // Type of the scalar value
6293  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6294  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6295  {
6300 
6301  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
6302  const typename MT4::OppositeType tmp( serial( A ) );
6303  addAssign( ~C, tmp * B * scalar );
6304  }
6305  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
6306  const typename MT5::OppositeType tmp( serial( B ) );
6307  addAssign( ~C, A * tmp * scalar );
6308  }
6309  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
6310  const typename MT4::OppositeType tmp( serial( A ) );
6311  addAssign( ~C, tmp * B * scalar );
6312  }
6313  else {
6314  const typename MT5::OppositeType tmp( serial( B ) );
6315  addAssign( ~C, A * tmp * scalar );
6316  }
6317  }
6318  //**********************************************************************************************
6319 
6320  //**Default addition assignment to dense matrices (large matrices)******************************
6334  template< typename MT3 // Type of the left-hand side target matrix
6335  , typename MT4 // Type of the left-hand side matrix operand
6336  , typename MT5 // Type of the right-hand side matrix operand
6337  , typename ST2 > // Type of the scalar value
6338  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6339  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6340  {
6341  selectDefaultAddAssignKernel( C, A, B, scalar );
6342  }
6343  //**********************************************************************************************
6344 
6345  //**Vectorized default addition assignment to row-major dense matrices (large matrices)*********
6360  template< typename MT3 // Type of the left-hand side target matrix
6361  , typename MT4 // Type of the left-hand side matrix operand
6362  , typename MT5 // Type of the right-hand side matrix operand
6363  , typename ST2 > // Type of the scalar value
6364  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6365  selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6366  {
6367  typedef IntrinsicTrait<ElementType> IT;
6368 
6369  const size_t M( A.rows() );
6370  const size_t N( B.columns() );
6371  const size_t K( A.columns() );
6372 
6373  const size_t iblock( 64UL );
6374  const size_t jblock( 128UL );
6375  const size_t kblock( 128UL );
6376 
6377  const IntrinsicType factor( set( scalar ) );
6378 
6379  for( size_t jj=0UL; jj<N; jj+=jblock )
6380  {
6381  const size_t jend( min( jj+jblock, N ) );
6382 
6383  for( size_t ii=0UL; ii<M; ii+=iblock )
6384  {
6385  const size_t iend( min( ii+iblock, M ) );
6386 
6387  for( size_t kk=0UL; kk<K; kk+=kblock )
6388  {
6389  const size_t ktmp( min( kk+kblock, K ) );
6390 
6391  size_t j( jj );
6392 
6393  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
6394  {
6395  const size_t j1( j+IT::size );
6396  const size_t j2( j+IT::size*2UL );
6397  const size_t j3( j+IT::size*3UL );
6398 
6399  size_t i( ii );
6400 
6401  for( ; (i+2UL) <= iend; i+=2UL )
6402  {
6403  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6404  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6405  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
6406  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
6407 
6408  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6409 
6410  for( size_t k=kbegin; k<kend; ++k ) {
6411  const IntrinsicType a1( set( A(i ,k) ) );
6412  const IntrinsicType a2( set( A(i+1UL,k) ) );
6413  const IntrinsicType b1( B.load(k,j ) );
6414  const IntrinsicType b2( B.load(k,j1) );
6415  const IntrinsicType b3( B.load(k,j2) );
6416  const IntrinsicType b4( B.load(k,j3) );
6417  xmm1 = xmm1 + a1 * b1;
6418  xmm2 = xmm2 + a1 * b2;
6419  xmm3 = xmm3 + a1 * b3;
6420  xmm4 = xmm4 + a1 * b4;
6421  xmm5 = xmm5 + a2 * b1;
6422  xmm6 = xmm6 + a2 * b2;
6423  xmm7 = xmm7 + a2 * b3;
6424  xmm8 = xmm8 + a2 * b4;
6425  }
6426 
6427  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6428  (~C).store( i , j1, (~C).load(i ,j1) + xmm2 * factor );
6429  (~C).store( i , j2, (~C).load(i ,j2) + xmm3 * factor );
6430  (~C).store( i , j3, (~C).load(i ,j3) + xmm4 * factor );
6431  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
6432  (~C).store( i+1UL, j1, (~C).load(i+1UL,j1) + xmm6 * factor );
6433  (~C).store( i+1UL, j2, (~C).load(i+1UL,j2) + xmm7 * factor );
6434  (~C).store( i+1UL, j3, (~C).load(i+1UL,j3) + xmm8 * factor );
6435  }
6436 
6437  if( i < iend )
6438  {
6439  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6440  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6441  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
6442  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
6443 
6444  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6445 
6446  for( size_t k=kbegin; k<kend; ++k ) {
6447  const IntrinsicType a1( set( A(i,k) ) );
6448  xmm1 = xmm1 + a1 * B.load(k,j );
6449  xmm2 = xmm2 + a1 * B.load(k,j1);
6450  xmm3 = xmm3 + a1 * B.load(k,j2);
6451  xmm4 = xmm4 + a1 * B.load(k,j3);
6452  }
6453 
6454  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
6455  (~C).store( i, j1, (~C).load(i,j1) + xmm2 * factor );
6456  (~C).store( i, j2, (~C).load(i,j2) + xmm3 * factor );
6457  (~C).store( i, j3, (~C).load(i,j3) + xmm4 * factor );
6458  }
6459  }
6460 
6461  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
6462  {
6463  const size_t j1( j+IT::size );
6464 
6465  size_t i( ii );
6466 
6467  for( ; (i+4UL) <= iend; i+=4UL )
6468  {
6469  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6470  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6471  const size_t kend ( min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
6472  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
6473 
6474  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6475 
6476  for( size_t k=kbegin; k<kend; ++k ) {
6477  const IntrinsicType a1( set( A(i ,k) ) );
6478  const IntrinsicType a2( set( A(i+1UL,k) ) );
6479  const IntrinsicType a3( set( A(i+2UL,k) ) );
6480  const IntrinsicType a4( set( A(i+3UL,k) ) );
6481  const IntrinsicType b1( B.load(k,j ) );
6482  const IntrinsicType b2( B.load(k,j1) );
6483  xmm1 = xmm1 + a1 * b1;
6484  xmm2 = xmm2 + a1 * b2;
6485  xmm3 = xmm3 + a2 * b1;
6486  xmm4 = xmm4 + a2 * b2;
6487  xmm5 = xmm5 + a3 * b1;
6488  xmm6 = xmm6 + a3 * b2;
6489  xmm7 = xmm7 + a4 * b1;
6490  xmm8 = xmm8 + a4 * b2;
6491  }
6492 
6493  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6494  (~C).store( i , j1, (~C).load(i ,j1) + xmm2 * factor );
6495  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
6496  (~C).store( i+1UL, j1, (~C).load(i+1UL,j1) + xmm4 * factor );
6497  (~C).store( i+2UL, j , (~C).load(i+2UL,j ) + xmm5 * factor );
6498  (~C).store( i+2UL, j1, (~C).load(i+2UL,j1) + xmm6 * factor );
6499  (~C).store( i+3UL, j , (~C).load(i+3UL,j ) + xmm7 * factor );
6500  (~C).store( i+3UL, j1, (~C).load(i+3UL,j1) + xmm8 * factor );
6501  }
6502 
6503  for( ; (i+2UL) <= iend; i+=2UL )
6504  {
6505  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6506  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6507  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
6508  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
6509 
6510  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6511 
6512  for( size_t k=kbegin; k<kend; ++k ) {
6513  const IntrinsicType a1( set( A(i ,k) ) );
6514  const IntrinsicType a2( set( A(i+1UL,k) ) );
6515  const IntrinsicType b1( B.load(k,j ) );
6516  const IntrinsicType b2( B.load(k,j1) );
6517  xmm1 = xmm1 + a1 * b1;
6518  xmm2 = xmm2 + a1 * b2;
6519  xmm3 = xmm3 + a2 * b1;
6520  xmm4 = xmm4 + a2 * b2;
6521  }
6522 
6523  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6524  (~C).store( i , j1, (~C).load(i ,j1) + xmm2 * factor );
6525  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
6526  (~C).store( i+1UL, j1, (~C).load(i+1UL,j1) + xmm4 * factor );
6527  }
6528 
6529  if( i < iend )
6530  {
6531  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6532  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6533  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
6534  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
6535 
6536  IntrinsicType xmm1, xmm2;
6537 
6538  for( size_t k=kbegin; k<kend; ++k ) {
6539  const IntrinsicType a1( set( A(i,k) ) );
6540  xmm1 = xmm1 + a1 * B.load(k,j );
6541  xmm2 = xmm2 + a1 * B.load(k,j1);
6542  }
6543 
6544  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
6545  (~C).store( i, j1, (~C).load(i,j1) + xmm2 * factor );
6546  }
6547  }
6548 
6549  if( j < jend )
6550  {
6551  for( size_t i=ii; i<iend; ++i )
6552  {
6553  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6554  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6555  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
6556  ( IsUpper<MT5>::value )?( min( j+IT::size, ktmp ) ):( ktmp ) ) );
6557 
6558  IntrinsicType xmm1;
6559 
6560  for( size_t k=kbegin; k<kend; ++k ) {
6561  const IntrinsicType a1( set( A(i,k) ) );
6562  xmm1 = xmm1 + a1 * B.load(k,j);
6563  }
6564 
6565  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
6566  }
6567  }
6568  }
6569  }
6570  }
6571  }
6572  //**********************************************************************************************
6573 
6574  //**Vectorized default addition assignment to column-major dense matrices (large matrices)******
6588  template< typename MT3 // Type of the left-hand side target matrix
6589  , typename MT4 // Type of the left-hand side matrix operand
6590  , typename MT5 // Type of the right-hand side matrix operand
6591  , typename ST2 > // Type of the scalar value
6592  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6593  selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6594  {
6595  selectSmallAddAssignKernel( ~C, A, B, scalar );
6596  }
6597  //**********************************************************************************************
6598 
6599  //**BLAS-based addition assignment to dense matrices (default)**********************************
6613  template< typename MT3 // Type of the left-hand side target matrix
6614  , typename MT4 // Type of the left-hand side matrix operand
6615  , typename MT5 // Type of the right-hand side matrix operand
6616  , typename ST2 > // Type of the scalar value
6617  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6618  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6619  {
6620  selectLargeAddAssignKernel( C, A, B, scalar );
6621  }
6622  //**********************************************************************************************
6623 
6624  //**BLAS-based addition assignment to dense matrices (single precision)*************************
6625 #if BLAZE_BLAS_MODE
6626 
6639  template< typename MT3 // Type of the left-hand side target matrix
6640  , typename MT4 // Type of the left-hand side matrix operand
6641  , typename MT5 // Type of the right-hand side matrix operand
6642  , typename ST2 > // Type of the scalar value
6643  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
6644  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6645  {
6646  if( IsTriangular<MT4>::value ) {
6647  typename MT3::ResultType tmp( B );
6648  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
6649  addAssign( C, tmp );
6650  }
6651  else if( IsTriangular<MT5>::value ) {
6652  typename MT3::ResultType tmp( A );
6653  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
6654  addAssign( C, tmp );
6655  }
6656  else {
6657  sgemm( C, A, B, scalar, 1.0F );
6658  }
6659  }
6660 #endif
6661  //**********************************************************************************************
6662 
6663  //**BLAS-based addition assignment to dense matrices (double precision)*************************
6664 #if BLAZE_BLAS_MODE
6665 
6678  template< typename MT3 // Type of the left-hand side target matrix
6679  , typename MT4 // Type of the left-hand side matrix operand
6680  , typename MT5 // Type of the right-hand side matrix operand
6681  , typename ST2 > // Type of the scalar value
6682  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
6683  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6684  {
6685  if( IsTriangular<MT4>::value ) {
6686  typename MT3::ResultType tmp( B );
6687  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
6688  addAssign( C, tmp );
6689  }
6690  else if( IsTriangular<MT5>::value ) {
6691  typename MT3::ResultType tmp( A );
6692  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
6693  addAssign( C, tmp );
6694  }
6695  else {
6696  dgemm( C, A, B, scalar, 1.0 );
6697  }
6698  }
6699 #endif
6700  //**********************************************************************************************
6701 
6702  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
6703 #if BLAZE_BLAS_MODE
6704 
6717  template< typename MT3 // Type of the left-hand side target matrix
6718  , typename MT4 // Type of the left-hand side matrix operand
6719  , typename MT5 // Type of the right-hand side matrix operand
6720  , typename ST2 > // Type of the scalar value
6721  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
6722  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6723  {
6724  if( IsTriangular<MT4>::value ) {
6725  typename MT3::ResultType tmp( B );
6726  ctrmm( tmp, A, CblasLeft,
6727  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
6728  complex<float>( scalar, 0.0F ) );
6729  addAssign( C, tmp );
6730  }
6731  else if( IsTriangular<MT5>::value ) {
6732  typename MT3::ResultType tmp( A );
6733  ctrmm( tmp, B, CblasRight,
6734  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
6735  complex<float>( scalar, 0.0F ) );
6736  addAssign( C, tmp );
6737  }
6738  else {
6739  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
6740  }
6741  }
6742 #endif
6743  //**********************************************************************************************
6744 
6745  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
6746 #if BLAZE_BLAS_MODE
6747 
6760  template< typename MT3 // Type of the left-hand side target matrix
6761  , typename MT4 // Type of the left-hand side matrix operand
6762  , typename MT5 // Type of the right-hand side matrix operand
6763  , typename ST2 > // Type of the scalar value
6764  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
6765  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6766  {
6767  if( IsTriangular<MT4>::value ) {
6768  typename MT3::ResultType tmp( B );
6769  ztrmm( tmp, A, CblasLeft,
6770  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
6771  complex<double>( scalar, 0.0 ) );
6772  addAssign( C, tmp );
6773  }
6774  else if( IsTriangular<MT5>::value ) {
6775  typename MT3::ResultType tmp( A );
6776  ztrmm( tmp, B, CblasRight,
6777  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
6778  complex<double>( scalar, 0.0 ) );
6779  addAssign( C, tmp );
6780  }
6781  else {
6782  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
6783  }
6784  }
6785 #endif
6786  //**********************************************************************************************
6787 
6788  //**Restructuring addition assignment to column-major matrices**********************************
6802  template< typename MT > // Type of the target matrix
6803  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
6804  addAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
6805  {
6807 
6809 
6810  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6811  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6812 
6813  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
6814  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
6815 
6816  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
6817  addAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
6818  else if( IsSymmetric<MT1>::value )
6819  addAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
6820  else
6821  addAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
6822  }
6823  //**********************************************************************************************
6824 
6825  //**Addition assignment to sparse matrices******************************************************
6826  // No special implementation for the addition assignment to sparse matrices.
6827  //**********************************************************************************************
6828 
6829  //**Subtraction assignment to dense matrices****************************************************
6841  template< typename MT // Type of the target dense matrix
6842  , bool SO > // Storage order of the target dense matrix
6843  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
6844  subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
6845  {
6847 
6848  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6849  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6850 
6851  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
6852  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
6853 
6854  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
6855  return;
6856  }
6857 
6858  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
6859  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
6860 
6861  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
6862  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
6863  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
6864  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
6865  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
6866  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
6867 
6868  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
6869  }
6870  //**********************************************************************************************
6871 
6872  //**Subtraction assignment to dense matrices (kernel selection)*********************************
6883  template< typename MT3 // Type of the left-hand side target matrix
6884  , typename MT4 // Type of the left-hand side matrix operand
6885  , typename MT5 // Type of the right-hand side matrix operand
6886  , typename ST2 > // Type of the scalar value
6887  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6888  {
6889  if( ( IsDiagonal<MT5>::value ) ||
6890  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
6891  selectSmallSubAssignKernel( C, A, B, scalar );
6892  else
6893  selectBlasSubAssignKernel( C, A, B, scalar );
6894  }
6895  //**********************************************************************************************
6896 
6897  //**Default subtraction assignment to dense matrices (general/general)**************************
6911  template< typename MT3 // Type of the left-hand side target matrix
6912  , typename MT4 // Type of the left-hand side matrix operand
6913  , typename MT5 // Type of the right-hand side matrix operand
6914  , typename ST2 > // Type of the scalar value
6915  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
6916  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6917  {
6918  const ResultType tmp( serial( A * B * scalar ) );
6919  subAssign( C, tmp );
6920  }
6921  //**********************************************************************************************
6922 
6923  //**Default subtraction assignment to dense matrices (general/diagonal)*************************
6937  template< typename MT3 // Type of the left-hand side target matrix
6938  , typename MT4 // Type of the left-hand side matrix operand
6939  , typename MT5 // Type of the right-hand side matrix operand
6940  , typename ST2 > // Type of the scalar value
6941  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
6942  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6943  {
6945 
6946  const size_t M( A.rows() );
6947  const size_t N( B.columns() );
6948 
6949  for( size_t i=0UL; i<M; ++i )
6950  {
6951  const size_t jbegin( ( IsUpper<MT4>::value )
6952  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
6953  :( 0UL ) );
6954  const size_t jend( ( IsLower<MT4>::value )
6955  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
6956  :( N ) );
6957  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
6958 
6959  const size_t jnum( jend - jbegin );
6960  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
6961 
6962  for( size_t j=jbegin; j<jpos; j+=2UL ) {
6963  C(i,j ) -= A(i,j ) * B(j ,j ) * scalar;
6964  C(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
6965  }
6966  if( jpos < jend ) {
6967  C(i,jpos) -= A(i,jpos) * B(jpos,jpos) * scalar;
6968  }
6969  }
6970  }
6971  //**********************************************************************************************
6972 
6973  //**Default subtraction assignment to dense matrices (diagonal/general)*************************
6987  template< typename MT3 // Type of the left-hand side target matrix
6988  , typename MT4 // Type of the left-hand side matrix operand
6989  , typename MT5 // Type of the right-hand side matrix operand
6990  , typename ST2 > // Type of the scalar value
6991  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
6992  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6993  {
6995 
6996  const size_t M( A.rows() );
6997  const size_t N( B.columns() );
6998 
6999  for( size_t i=0UL; i<M; ++i )
7000  {
7001  const size_t jbegin( ( IsUpper<MT5>::value )
7002  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
7003  :( 0UL ) );
7004  const size_t jend( ( IsLower<MT5>::value )
7005  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
7006  :( N ) );
7007  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
7008 
7009  const size_t jnum( jend - jbegin );
7010  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
7011 
7012  for( size_t j=jbegin; j<jpos; j+=2UL ) {
7013  C(i,j ) -= A(i,i) * B(i,j ) * scalar;
7014  C(i,j+1UL) -= A(i,i) * B(i,j+1UL) * scalar;
7015  }
7016  if( jpos < jend ) {
7017  C(i,jpos) -= A(i,i) * B(i,jpos) * scalar;
7018  }
7019  }
7020  }
7021  //**********************************************************************************************
7022 
7023  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
7037  template< typename MT3 // Type of the left-hand side target matrix
7038  , typename MT4 // Type of the left-hand side matrix operand
7039  , typename MT5 // Type of the right-hand side matrix operand
7040  , typename ST2 > // Type of the scalar value
7041  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
7042  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7043  {
7045 
7046  for( size_t i=0UL; i<A.rows(); ++i ) {
7047  C(i,i) -= A(i,i) * B(i,i) * scalar;
7048  }
7049  }
7050  //**********************************************************************************************
7051 
7052  //**Default subtraction assignment to dense matrices (small matrices)***************************
7066  template< typename MT3 // Type of the left-hand side target matrix
7067  , typename MT4 // Type of the left-hand side matrix operand
7068  , typename MT5 // Type of the right-hand side matrix operand
7069  , typename ST2 > // Type of the scalar value
7070  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7071  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7072  {
7073  selectDefaultSubAssignKernel( C, A, B, scalar );
7074  }
7075  //**********************************************************************************************
7076 
7077  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
7092  template< typename MT3 // Type of the left-hand side target matrix
7093  , typename MT4 // Type of the left-hand side matrix operand
7094  , typename MT5 // Type of the right-hand side matrix operand
7095  , typename ST2 > // Type of the scalar value
7096  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7097  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7098  {
7099  typedef IntrinsicTrait<ElementType> IT;
7100 
7101  const size_t M( A.rows() );
7102  const size_t N( B.columns() );
7103  const size_t K( A.columns() );
7104 
7105  const IntrinsicType factor( set( scalar ) );
7106 
7107  size_t j( 0UL );
7108 
7109  for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
7110  for( size_t i=0UL; i<M; ++i )
7111  {
7112  const size_t kbegin( ( IsUpper<MT4>::value )
7113  ?( ( IsLower<MT5>::value )
7114  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7115  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7116  :( IsLower<MT5>::value ? j : 0UL ) );
7117  const size_t kend( ( IsLower<MT4>::value )
7118  ?( ( IsUpper<MT5>::value )
7119  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+IT::size*8UL, K ) )
7120  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
7121  :( IsUpper<MT5>::value ? min( j+IT::size*8UL, K ) : K ) );
7122 
7123  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7124 
7125  for( size_t k=kbegin; k<kend; ++k ) {
7126  const IntrinsicType a1( set( A(i,k) ) );
7127  xmm1 = xmm1 + a1 * B.load(k,j );
7128  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
7129  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
7130  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
7131  xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
7132  xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
7133  xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
7134  xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
7135  }
7136 
7137  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
7138  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
7139  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
7140  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
7141  (~C).store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) - xmm5 * factor );
7142  (~C).store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) - xmm6 * factor );
7143  (~C).store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) - xmm7 * factor );
7144  (~C).store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) - xmm8 * factor );
7145  }
7146  }
7147 
7148  for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL )
7149  {
7150  size_t i( 0UL );
7151 
7152  for( ; (i+2UL) <= M; i+=2UL )
7153  {
7154  const size_t kbegin( ( IsUpper<MT4>::value )
7155  ?( ( IsLower<MT5>::value )
7156  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7157  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7158  :( IsLower<MT5>::value ? j : 0UL ) );
7159  const size_t kend( ( IsLower<MT4>::value )
7160  ?( ( IsUpper<MT5>::value )
7161  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*4UL, K ) )
7162  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
7163  :( IsUpper<MT5>::value ? min( j+IT::size*4UL, K ) : K ) );
7164 
7165  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7166 
7167  for( size_t k=kbegin; k<kend; ++k ) {
7168  const IntrinsicType a1( set( A(i ,k) ) );
7169  const IntrinsicType a2( set( A(i+1UL,k) ) );
7170  const IntrinsicType b1( B.load(k,j ) );
7171  const IntrinsicType b2( B.load(k,j+IT::size ) );
7172  const IntrinsicType b3( B.load(k,j+IT::size*2UL) );
7173  const IntrinsicType b4( B.load(k,j+IT::size*3UL) );
7174  xmm1 = xmm1 + a1 * b1;
7175  xmm2 = xmm2 + a1 * b2;
7176  xmm3 = xmm3 + a1 * b3;
7177  xmm4 = xmm4 + a1 * b4;
7178  xmm5 = xmm5 + a2 * b1;
7179  xmm6 = xmm6 + a2 * b2;
7180  xmm7 = xmm7 + a2 * b3;
7181  xmm8 = xmm8 + a2 * b4;
7182  }
7183 
7184  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7185  (~C).store( i , j+IT::size , (~C).load(i ,j+IT::size ) - xmm2 * factor );
7186  (~C).store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) - xmm3 * factor );
7187  (~C).store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) - xmm4 * factor );
7188  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
7189  (~C).store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) - xmm6 * factor );
7190  (~C).store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) - xmm7 * factor );
7191  (~C).store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) - xmm8 * factor );
7192  }
7193 
7194  if( i < M )
7195  {
7196  const size_t kbegin( ( IsUpper<MT4>::value )
7197  ?( ( IsLower<MT5>::value )
7198  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7199  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7200  :( IsLower<MT5>::value ? j : 0UL ) );
7201  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, K ) ):( K ) );
7202 
7203  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7204 
7205  for( size_t k=kbegin; k<kend; ++k ) {
7206  const IntrinsicType a1( set( A(i,k) ) );
7207  xmm1 = xmm1 + a1 * B.load(k,j );
7208  xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
7209  xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
7210  xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
7211  }
7212 
7213  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
7214  (~C).store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
7215  (~C).store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
7216  (~C).store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
7217  }
7218  }
7219 
7220  for( ; (j+IT::size) < N; j+=IT::size*2UL )
7221  {
7222  size_t i( 0UL );
7223 
7224  for( ; (i+2UL) <= M; i+=2UL )
7225  {
7226  const size_t kbegin( ( IsUpper<MT4>::value )
7227  ?( ( IsLower<MT5>::value )
7228  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7229  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7230  :( IsLower<MT5>::value ? j : 0UL ) );
7231  const size_t kend( ( IsLower<MT4>::value )
7232  ?( ( IsUpper<MT5>::value )
7233  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+IT::size*2UL, K ) )
7234  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
7235  :( IsUpper<MT5>::value ? min( j+IT::size*2UL, K ) : K ) );
7236 
7237  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7238 
7239  for( size_t k=kbegin; k<kend; ++k ) {
7240  const IntrinsicType a1( set( A(i ,k) ) );
7241  const IntrinsicType a2( set( A(i+1UL,k) ) );
7242  const IntrinsicType b1( B.load(k,j ) );
7243  const IntrinsicType b2( B.load(k,j+IT::size) );
7244  xmm1 = xmm1 + a1 * b1;
7245  xmm2 = xmm2 + a1 * b2;
7246  xmm3 = xmm3 + a2 * b1;
7247  xmm4 = xmm4 + a2 * b2;
7248  }
7249 
7250  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7251  (~C).store( i , j+IT::size, (~C).load(i ,j+IT::size) - xmm2 * factor );
7252  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
7253  (~C).store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) - xmm4 * factor );
7254  }
7255 
7256  if( i < M )
7257  {
7258  const size_t kbegin( ( IsUpper<MT4>::value )
7259  ?( ( IsLower<MT5>::value )
7260  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7261  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7262  :( IsLower<MT5>::value ? j : 0UL ) );
7263  const size_t kend( ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, K ) ):( K ) );
7264 
7265  IntrinsicType xmm1, xmm2;
7266 
7267  for( size_t k=kbegin; k<kend; ++k ) {
7268  const IntrinsicType a1( set( A(i,k) ) );
7269  xmm1 = xmm1 + a1 * B.load(k,j );
7270  xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
7271  }
7272 
7273  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
7274  (~C).store( i, j+IT::size, (~C).load(i,j+IT::size) - xmm2 * factor );
7275  }
7276  }
7277 
7278  if( j < N )
7279  {
7280  size_t i( 0UL );
7281 
7282  for( ; (i+2UL) <= M; i+=2UL )
7283  {
7284  const size_t kbegin( ( IsUpper<MT4>::value )
7285  ?( ( IsLower<MT5>::value )
7286  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7287  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7288  :( IsLower<MT5>::value ? j : 0UL ) );
7289  const size_t kend( ( IsLower<MT4>::value )
7290  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
7291  :( K ) );
7292 
7293  IntrinsicType xmm1, xmm2;
7294 
7295  for( size_t k=kbegin; k<kend; ++k ) {
7296  const IntrinsicType b1( B.load(k,j) );
7297  xmm1 = xmm1 + set( A(i ,k) ) * b1;
7298  xmm2 = xmm2 + set( A(i+1UL,k) ) * b1;
7299  }
7300 
7301  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
7302  (~C).store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
7303  }
7304 
7305  if( i < M )
7306  {
7307  const size_t kbegin( ( IsUpper<MT4>::value )
7308  ?( ( IsLower<MT5>::value )
7309  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
7310  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
7311  :( IsLower<MT5>::value ? j : 0UL ) );
7312 
7313  IntrinsicType xmm1;
7314 
7315  for( size_t k=kbegin; k<K; ++k ) {
7316  xmm1 = xmm1 + set( A(i,k) ) * B.load(k,j);
7317  }
7318 
7319  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
7320  }
7321  }
7322  }
7323  //**********************************************************************************************
7324 
7325  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
7339  template< typename MT3 // Type of the left-hand side target matrix
7340  , typename MT4 // Type of the left-hand side matrix operand
7341  , typename MT5 // Type of the right-hand side matrix operand
7342  , typename ST2 > // Type of the scalar value
7343  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7344  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7345  {
7350 
7351  if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
7352  const typename MT4::OppositeType tmp( serial( A ) );
7353  subAssign( ~C, tmp * B * scalar );
7354  }
7355  else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
7356  const typename MT5::OppositeType tmp( serial( B ) );
7357  subAssign( ~C, A * tmp * scalar );
7358  }
7359  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
7360  const typename MT4::OppositeType tmp( serial( A ) );
7361  subAssign( ~C, tmp * B * scalar );
7362  }
7363  else {
7364  const typename MT5::OppositeType tmp( serial( B ) );
7365  subAssign( ~C, A * tmp * scalar );
7366  }
7367  }
7368  //**********************************************************************************************
7369 
7370  //**Default subtraction assignment to dense matrices (large matrices)***************************
7384  template< typename MT3 // Type of the left-hand side target matrix
7385  , typename MT4 // Type of the left-hand side matrix operand
7386  , typename MT5 // Type of the right-hand side matrix operand
7387  , typename ST2 > // Type of the scalar value
7388  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7389  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7390  {
7391  selectDefaultSubAssignKernel( C, A, B, scalar );
7392  }
7393  //**********************************************************************************************
7394 
7395  //**Vectorized default subtraction assignment to row-major dense matrices (large matrices)******
7410  template< typename MT3 // Type of the left-hand side target matrix
7411  , typename MT4 // Type of the left-hand side matrix operand
7412  , typename MT5 // Type of the right-hand side matrix operand
7413  , typename ST2 > // Type of the scalar value
7414  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7415  selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7416  {
7417  typedef IntrinsicTrait<ElementType> IT;
7418 
7419  const size_t M( A.rows() );
7420  const size_t N( B.columns() );
7421  const size_t K( A.columns() );
7422 
7423  const size_t iblock( 64UL );
7424  const size_t jblock( 128UL );
7425  const size_t kblock( 128UL );
7426 
7427  const IntrinsicType factor( set( scalar ) );
7428 
7429  for( size_t jj=0UL; jj<N; jj+=jblock )
7430  {
7431  const size_t jend( min( jj+jblock, N ) );
7432 
7433  for( size_t ii=0UL; ii<M; ii+=iblock )
7434  {
7435  const size_t iend( min( ii+iblock, M ) );
7436 
7437  for( size_t kk=0UL; kk<K; kk+=kblock )
7438  {
7439  const size_t ktmp( min( kk+kblock, K ) );
7440 
7441  size_t j( jj );
7442 
7443  for( ; (j+IT::size*3UL) < jend; j+=IT::size*4UL )
7444  {
7445  const size_t j1( j+IT::size );
7446  const size_t j2( j+IT::size*2UL );
7447  const size_t j3( j+IT::size*3UL );
7448 
7449  size_t i( ii );
7450 
7451  for( ; (i+2UL) <= iend; i+=2UL )
7452  {
7453  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7454  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7455  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
7456  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
7457 
7458  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7459 
7460  for( size_t k=kbegin; k<kend; ++k ) {
7461  const IntrinsicType a1( set( A(i ,k) ) );
7462  const IntrinsicType a2( set( A(i+1UL,k) ) );
7463  const IntrinsicType b1( B.load(k,j ) );
7464  const IntrinsicType b2( B.load(k,j1) );
7465  const IntrinsicType b3( B.load(k,j2) );
7466  const IntrinsicType b4( B.load(k,j3) );
7467  xmm1 = xmm1 + a1 * b1;
7468  xmm2 = xmm2 + a1 * b2;
7469  xmm3 = xmm3 + a1 * b3;
7470  xmm4 = xmm4 + a1 * b4;
7471  xmm5 = xmm5 + a2 * b1;
7472  xmm6 = xmm6 + a2 * b2;
7473  xmm7 = xmm7 + a2 * b3;
7474  xmm8 = xmm8 + a2 * b4;
7475  }
7476 
7477  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7478  (~C).store( i , j1, (~C).load(i ,j1) - xmm2 * factor );
7479  (~C).store( i , j2, (~C).load(i ,j2) - xmm3 * factor );
7480  (~C).store( i , j3, (~C).load(i ,j3) - xmm4 * factor );
7481  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
7482  (~C).store( i+1UL, j1, (~C).load(i+1UL,j1) - xmm6 * factor );
7483  (~C).store( i+1UL, j2, (~C).load(i+1UL,j2) - xmm7 * factor );
7484  (~C).store( i+1UL, j3, (~C).load(i+1UL,j3) - xmm8 * factor );
7485  }
7486 
7487  if( i < iend )
7488  {
7489  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7490  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7491  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
7492  ( IsUpper<MT5>::value )?( min( j+IT::size*4UL, ktmp ) ):( ktmp ) ) );
7493 
7494  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7495 
7496  for( size_t k=kbegin; k<kend; ++k ) {
7497  const IntrinsicType a1( set( A(i,k) ) );
7498  xmm1 = xmm1 + a1 * B.load(k,j );
7499  xmm2 = xmm2 + a1 * B.load(k,j1);
7500  xmm3 = xmm3 + a1 * B.load(k,j2);
7501  xmm4 = xmm4 + a1 * B.load(k,j3);
7502  }
7503 
7504  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
7505  (~C).store( i, j1, (~C).load(i,j1) - xmm2 * factor );
7506  (~C).store( i, j2, (~C).load(i,j2) - xmm3 * factor );
7507  (~C).store( i, j3, (~C).load(i,j3) - xmm4 * factor );
7508  }
7509  }
7510 
7511  for( ; (j+IT::size) < jend; j+=IT::size*2UL )
7512  {
7513  const size_t j1( j+IT::size );
7514 
7515  size_t i( ii );
7516 
7517  for( ; (i+4UL) <= iend; i+=4UL )
7518  {
7519  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7520  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7521  const size_t kend ( min( ( IsLower<MT4>::value )?( i+4UL ):( ktmp ),
7522  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
7523 
7524  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7525 
7526  for( size_t k=kbegin; k<kend; ++k ) {
7527  const IntrinsicType a1( set( A(i ,k) ) );
7528  const IntrinsicType a2( set( A(i+1UL,k) ) );
7529  const IntrinsicType a3( set( A(i+2UL,k) ) );
7530  const IntrinsicType a4( set( A(i+3UL,k) ) );
7531  const IntrinsicType b1( B.load(k,j ) );
7532  const IntrinsicType b2( B.load(k,j1) );
7533  xmm1 = xmm1 + a1 * b1;
7534  xmm2 = xmm2 + a1 * b2;
7535  xmm3 = xmm3 + a2 * b1;
7536  xmm4 = xmm4 + a2 * b2;
7537  xmm5 = xmm5 + a3 * b1;
7538  xmm6 = xmm6 + a3 * b2;
7539  xmm7 = xmm7 + a4 * b1;
7540  xmm8 = xmm8 + a4 * b2;
7541  }
7542 
7543  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7544  (~C).store( i , j1, (~C).load(i ,j1) - xmm2 * factor );
7545  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
7546  (~C).store( i+1UL, j1, (~C).load(i+1UL,j1) - xmm4 * factor );
7547  (~C).store( i+2UL, j , (~C).load(i+2UL,j ) - xmm5 * factor );
7548  (~C).store( i+2UL, j1, (~C).load(i+2UL,j1) - xmm6 * factor );
7549  (~C).store( i+3UL, j , (~C).load(i+3UL,j ) - xmm7 * factor );
7550  (~C).store( i+3UL, j1, (~C).load(i+3UL,j1) - xmm8 * factor );
7551  }
7552 
7553  for( ; (i+2UL) <= iend; i+=2UL )
7554  {
7555  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7556  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7557  const size_t kend ( min( ( IsLower<MT4>::value )?( i+2UL ):( ktmp ),
7558  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
7559 
7560  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7561 
7562  for( size_t k=kbegin; k<kend; ++k ) {
7563  const IntrinsicType a1( set( A(i ,k) ) );
7564  const IntrinsicType a2( set( A(i+1UL,k) ) );
7565  const IntrinsicType b1( B.load(k,j ) );
7566  const IntrinsicType b2( B.load(k,j1) );
7567  xmm1 = xmm1 + a1 * b1;
7568  xmm2 = xmm2 + a1 * b2;
7569  xmm3 = xmm3 + a2 * b1;
7570  xmm4 = xmm4 + a2 * b2;
7571  }
7572 
7573  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7574  (~C).store( i , j1, (~C).load(i ,j1) - xmm2 * factor );
7575  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
7576  (~C).store( i+1UL, j1, (~C).load(i+1UL,j1) - xmm4 * factor );
7577  }
7578 
7579  if( i < iend )
7580  {
7581  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7582  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7583  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
7584  ( IsUpper<MT5>::value )?( min( j+IT::size*2UL, ktmp ) ):( ktmp ) ) );
7585 
7586  IntrinsicType xmm1, xmm2;
7587 
7588  for( size_t k=kbegin; k<kend; ++k ) {
7589  const IntrinsicType a1( set( A(i,k) ) );
7590  xmm1 = xmm1 + a1 * B.load(k,j );
7591  xmm2 = xmm2 + a1 * B.load(k,j1);
7592  }
7593 
7594  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
7595  (~C).store( i, j1, (~C).load(i,j1) - xmm2 * factor );
7596  }
7597  }
7598 
7599  if( j < jend )
7600  {
7601  for( size_t i=ii; i<iend; ++i )
7602  {
7603  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7604  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7605  const size_t kend ( min( ( IsLower<MT4>::value )?( i+1UL ):( ktmp ),
7606  ( IsUpper<MT5>::value )?( min( j+IT::size, ktmp ) ):( ktmp ) ) );
7607 
7608  IntrinsicType xmm1;
7609 
7610  for( size_t k=kbegin; k<kend; ++k ) {
7611  const IntrinsicType a1( set( A(i,k) ) );
7612  xmm1 = xmm1 + a1 * B.load(k,j);
7613  }
7614 
7615  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
7616  }
7617  }
7618  }
7619  }
7620  }
7621  }
7622  //**********************************************************************************************
7623 
7624  //**Vectorized default subtraction assignment to column-major dense matrices (large matrices)***
7638  template< typename MT3 // Type of the left-hand side target matrix
7639  , typename MT4 // Type of the left-hand side matrix operand
7640  , typename MT5 // Type of the right-hand side matrix operand
7641  , typename ST2 > // Type of the scalar value
7642  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7643  selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7644  {
7645  selectSmallSubAssignKernel( ~C, A, B, scalar );
7646  }
7647  //**********************************************************************************************
7648 
7649  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
7663  template< typename MT3 // Type of the left-hand side target matrix
7664  , typename MT4 // Type of the left-hand side matrix operand
7665  , typename MT5 // Type of the right-hand side matrix operand
7666  , typename ST2 > // Type of the scalar value
7667  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7668  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7669  {
7670  selectLargeSubAssignKernel( C, A, B, scalar );
7671  }
7672  //**********************************************************************************************
7673 
7674  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
7675 #if BLAZE_BLAS_MODE
7676 
7689  template< typename MT3 // Type of the left-hand side target matrix
7690  , typename MT4 // Type of the left-hand side matrix operand
7691  , typename MT5 // Type of the right-hand side matrix operand
7692  , typename ST2 > // Type of the scalar value
7693  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
7694  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7695  {
7696  if( IsTriangular<MT4>::value ) {
7697  typename MT3::ResultType tmp( B );
7698  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
7699  subAssign( C, tmp );
7700  }
7701  else if( IsTriangular<MT5>::value ) {
7702  typename MT3::ResultType tmp( A );
7703  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
7704  subAssign( C, tmp );
7705  }
7706  else {
7707  sgemm( C, A, B, -scalar, 1.0F );
7708  }
7709  }
7710 #endif
7711  //**********************************************************************************************
7712 
7713  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
7714 #if BLAZE_BLAS_MODE
7715 
7728  template< typename MT3 // Type of the left-hand side target matrix
7729  , typename MT4 // Type of the left-hand side matrix operand
7730  , typename MT5 // Type of the right-hand side matrix operand
7731  , typename ST2 > // Type of the scalar value
7732  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
7733  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7734  {
7735  if( IsTriangular<MT4>::value ) {
7736  typename MT3::ResultType tmp( B );
7737  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
7738  subAssign( C, tmp );
7739  }
7740  else if( IsTriangular<MT5>::value ) {
7741  typename MT3::ResultType tmp( A );
7742  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
7743  subAssign( C, tmp );
7744  }
7745  else {
7746  dgemm( C, A, B, -scalar, 1.0 );
7747  }
7748  }
7749 #endif
7750  //**********************************************************************************************
7751 
7752  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
7753 #if BLAZE_BLAS_MODE
7754 
7767  template< typename MT3 // Type of the left-hand side target matrix
7768  , typename MT4 // Type of the left-hand side matrix operand
7769  , typename MT5 // Type of the right-hand side matrix operand
7770  , typename ST2 > // Type of the scalar value
7771  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
7772  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7773  {
7774  if( IsTriangular<MT4>::value ) {
7775  typename MT3::ResultType tmp( B );
7776  ctrmm( tmp, A, CblasLeft,
7777  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
7778  complex<float>( scalar, 0.0F ) );
7779  subAssign( C, tmp );
7780  }
7781  else if( IsTriangular<MT5>::value ) {
7782  typename MT3::ResultType tmp( A );
7783  ctrmm( tmp, B, CblasRight,
7784  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
7785  complex<float>( scalar, 0.0F ) );
7786  subAssign( C, tmp );
7787  }
7788  else {
7789  cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
7790  }
7791  }
7792 #endif
7793  //**********************************************************************************************
7794 
7795  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
7796 #if BLAZE_BLAS_MODE
7797 
7810  template< typename MT3 // Type of the left-hand side target matrix
7811  , typename MT4 // Type of the left-hand side matrix operand
7812  , typename MT5 // Type of the right-hand side matrix operand
7813  , typename ST2 > // Type of the scalar value
7814  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
7815  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7816  {
7817  if( IsTriangular<MT4>::value ) {
7818  typename MT3::ResultType tmp( B );
7819  ztrmm( tmp, A, CblasLeft,
7820  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
7821  complex<float>( scalar, 0.0 ) );
7822  subAssign( C, tmp );
7823  }
7824  else if( IsTriangular<MT5>::value ) {
7825  typename MT3::ResultType tmp( A );
7826  ztrmm( tmp, B, CblasRight,
7827  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
7828  complex<float>( scalar, 0.0 ) );
7829  subAssign( C, tmp );
7830  }
7831  else {
7832  zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
7833  }
7834  }
7835 #endif
7836  //**********************************************************************************************
7837 
7838  //**Restructuring subtraction assignment to column-major matrices*******************************
7852  template< typename MT > // Type of the target matrix
7853  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
7854  subAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
7855  {
7857 
7859 
7860  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7861  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7862 
7863  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
7864  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
7865 
7866  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
7867  subAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
7868  else if( IsSymmetric<MT1>::value )
7869  subAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
7870  else
7871  subAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
7872  }
7873  //**********************************************************************************************
7874 
7875  //**Subtraction assignment to sparse matrices***************************************************
7876  // No special implementation for the subtraction assignment to sparse matrices.
7877  //**********************************************************************************************
7878 
7879  //**Multiplication assignment to dense matrices*************************************************
7880  // No special implementation for the multiplication assignment to dense matrices.
7881  //**********************************************************************************************
7882 
7883  //**Multiplication assignment to sparse matrices************************************************
7884  // No special implementation for the multiplication assignment to sparse matrices.
7885  //**********************************************************************************************
7886 
7887  //**SMP assignment to dense matrices************************************************************
7902  template< typename MT // Type of the target dense matrix
7903  , bool SO > // Storage order of the target dense matrix
7904  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
7905  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7906  {
7908 
7909  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7910  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7911 
7912  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
7913  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
7914 
7915  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
7916  return;
7917  }
7918  else if( left.columns() == 0UL ) {
7919  reset( ~lhs );
7920  return;
7921  }
7922 
7923  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7924  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7925 
7926  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7927  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7928  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7929  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7930  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7931  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7932 
7933  smpAssign( ~lhs, A * B * rhs.scalar_ );
7934  }
7935  //**********************************************************************************************
7936 
7937  //**SMP assignment to sparse matrices***********************************************************
7952  template< typename MT // Type of the target sparse matrix
7953  , bool SO > // Storage order of the target sparse matrix
7954  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
7955  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7956  {
7958 
7959  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
7960 
7967 
7968  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7969  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7970 
7971  const TmpType tmp( rhs );
7972  smpAssign( ~lhs, tmp );
7973  }
7974  //**********************************************************************************************
7975 
7976  //**Restructuring SMP assignment to column-major matrices***************************************
7990  template< typename MT > // Type of the target matrix
7991  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
7992  smpAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
7993  {
7995 
7997 
7998  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7999  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8000 
8001  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8002  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8003 
8004  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
8005  smpAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
8006  else if( IsSymmetric<MT1>::value )
8007  smpAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
8008  else
8009  smpAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
8010  }
8011  //**********************************************************************************************
8012 
8013  //**SMP addition assignment to dense matrices***************************************************
8028  template< typename MT // Type of the target dense matrix
8029  , bool SO > // Storage order of the target dense matrix
8030  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
8031  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
8032  {
8034 
8035  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8036  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8037 
8038  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8039  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8040 
8041  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
8042  return;
8043  }
8044 
8045  LT A( left ); // Evaluation of the left-hand side dense matrix operand
8046  RT B( right ); // Evaluation of the right-hand side dense matrix operand
8047 
8048  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
8049  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
8050  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
8051  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
8052  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
8053  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
8054 
8055  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
8056  }
8057  //**********************************************************************************************
8058 
8059  //**Restructuring SMP addition assignment to column-major matrices******************************
8073  template< typename MT > // Type of the target matrix
8074  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
8075  smpAddAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
8076  {
8078 
8080 
8081  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8082  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8083 
8084  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8085  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8086 
8087  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
8088  smpAddAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
8089  else if( IsSymmetric<MT1>::value )
8090  smpAddAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
8091  else
8092  smpAddAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
8093  }
8094  //**********************************************************************************************
8095 
8096  //**SMP addition assignment to sparse matrices**************************************************
8097  // No special implementation for the SMP addition assignment to sparse matrices.
8098  //**********************************************************************************************
8099 
8100  //**SMP subtraction assignment to dense matrices************************************************
8115  template< typename MT // Type of the target dense matrix
8116  , bool SO > // Storage order of the target dense matrix
8117  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
8118  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
8119  {
8121 
8122  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8123  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8124 
8125  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8126  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8127 
8128  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
8129  return;
8130  }
8131 
8132  LT A( left ); // Evaluation of the left-hand side dense matrix operand
8133  RT B( right ); // Evaluation of the right-hand side dense matrix operand
8134 
8135  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
8136  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
8137  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
8138  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
8139  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
8140  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
8141 
8142  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
8143  }
8144  //**********************************************************************************************
8145 
8146  //**Restructuring SMP subtraction assignment to column-major matrices***************************
8160  template< typename MT > // Type of the target matrix
8161  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
8162  smpSubAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
8163  {
8165 
8167 
8168  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8169  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8170 
8171  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8172  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8173 
8174  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
8175  smpSubAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
8176  else if( IsSymmetric<MT1>::value )
8177  smpSubAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
8178  else
8179  smpSubAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
8180  }
8181  //**********************************************************************************************
8182 
8183  //**SMP subtraction assignment to sparse matrices***********************************************
8184  // No special implementation for the SMP subtraction assignment to sparse matrices.
8185  //**********************************************************************************************
8186 
8187  //**SMP multiplication assignment to dense matrices*********************************************
8188  // No special implementation for the SMP multiplication assignment to dense matrices.
8189  //**********************************************************************************************
8190 
8191  //**SMP multiplication assignment to sparse matrices********************************************
8192  // No special implementation for the SMP multiplication assignment to sparse matrices.
8193  //**********************************************************************************************
8194 
8195  //**Compile time checks*************************************************************************
8203  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
8204  //**********************************************************************************************
8205 };
8207 //*************************************************************************************************
8208 
8209 
8210 
8211 
8212 //=================================================================================================
8213 //
8214 // GLOBAL BINARY ARITHMETIC OPERATORS
8215 //
8216 //=================================================================================================
8217 
8218 //*************************************************************************************************
8244 template< typename T1 // Type of the left-hand side dense matrix
8245  , typename T2 > // Type of the right-hand side dense matrix
8246 inline const DMatDMatMultExpr<T1,T2>
8248 {
8250 
8251  if( (~lhs).columns() != (~rhs).rows() )
8252  throw std::invalid_argument( "Matrix sizes do not match" );
8253 
8254  return DMatDMatMultExpr<T1,T2>( ~lhs, ~rhs );
8255 }
8256 //*************************************************************************************************
8257 
8258 
8259 
8260 
8261 //=================================================================================================
8262 //
8263 // ROWS SPECIALIZATIONS
8264 //
8265 //=================================================================================================
8266 
8267 //*************************************************************************************************
8269 template< typename MT1, typename MT2 >
8270 struct Rows< DMatDMatMultExpr<MT1,MT2> >
8271  : public Rows<MT1>
8272 {};
8274 //*************************************************************************************************
8275 
8276 
8277 
8278 
8279 //=================================================================================================
8280 //
8281 // COLUMNS SPECIALIZATIONS
8282 //
8283 //=================================================================================================
8284 
8285 //*************************************************************************************************
8287 template< typename MT1, typename MT2 >
8288 struct Columns< DMatDMatMultExpr<MT1,MT2> >
8289  : public Columns<MT2>
8290 {};
8292 //*************************************************************************************************
8293 
8294 
8295 
8296 
8297 //=================================================================================================
8298 //
8299 // ISLOWER SPECIALIZATIONS
8300 //
8301 //=================================================================================================
8302 
8303 //*************************************************************************************************
8305 template< typename MT1, typename MT2 >
8306 struct IsLower< DMatDMatMultExpr<MT1,MT2> >
8307  : public IsTrue< And< IsLower<MT1>, IsLower<MT2> >::value >
8308 {};
8310 //*************************************************************************************************
8311 
8312 
8313 
8314 
8315 //=================================================================================================
8316 //
8317 // ISUNILOWER SPECIALIZATIONS
8318 //
8319 //=================================================================================================
8320 
8321 //*************************************************************************************************
8323 template< typename MT1, typename MT2 >
8324 struct IsUniLower< DMatDMatMultExpr<MT1,MT2> >
8325  : public IsTrue< And< IsUniLower<MT1>, IsUniLower<MT2> >::value >
8326 {};
8328 //*************************************************************************************************
8329 
8330 
8331 
8332 
8333 //=================================================================================================
8334 //
8335 // ISSTRICTLYLOWER SPECIALIZATIONS
8336 //
8337 //=================================================================================================
8338 
8339 //*************************************************************************************************
8341 template< typename MT1, typename MT2 >
8342 struct IsStrictlyLower< DMatDMatMultExpr<MT1,MT2> >
8343  : public IsTrue< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
8344  , And< IsStrictlyLower<MT2>, IsLower<MT1> > >::value >
8345 {};
8347 //*************************************************************************************************
8348 
8349 
8350 
8351 
8352 //=================================================================================================
8353 //
8354 // ISUPPER SPECIALIZATIONS
8355 //
8356 //=================================================================================================
8357 
8358 //*************************************************************************************************
8360 template< typename MT1, typename MT2 >
8361 struct IsUpper< DMatDMatMultExpr<MT1,MT2> >
8362  : public IsTrue< And< IsUpper<MT1>, IsUpper<MT2> >::value >
8363 {};
8365 //*************************************************************************************************
8366 
8367 
8368 
8369 
8370 //=================================================================================================
8371 //
8372 // ISUNIUPPER SPECIALIZATIONS
8373 //
8374 //=================================================================================================
8375 
8376 //*************************************************************************************************
8378 template< typename MT1, typename MT2 >
8379 struct IsUniUpper< DMatDMatMultExpr<MT1,MT2> >
8380  : public IsTrue< And< IsUniUpper<MT1>, IsUniUpper<MT2> >::value >
8381 {};
8383 //*************************************************************************************************
8384 
8385 
8386 
8387 
8388 //=================================================================================================
8389 //
8390 // ISSTRICTLYUPPER SPECIALIZATIONS
8391 //
8392 //=================================================================================================
8393 
8394 //*************************************************************************************************
8396 template< typename MT1, typename MT2 >
8397 struct IsStrictlyUpper< DMatDMatMultExpr<MT1,MT2> >
8398  : public IsTrue< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
8399  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > >::value >
8400 {};
8402 //*************************************************************************************************
8403 
8404 
8405 
8406 
8407 //=================================================================================================
8408 //
8409 // EXPRESSION TRAIT SPECIALIZATIONS
8410 //
8411 //=================================================================================================
8412 
8413 //*************************************************************************************************
8415 template< typename MT1, typename MT2, typename VT >
8416 struct DMatDVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
8417 {
8418  public:
8419  //**********************************************************************************************
8420  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8421  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
8422  IsDenseVector<VT>::value && IsColumnVector<VT>::value
8423  , typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
8424  , INVALID_TYPE >::Type Type;
8425  //**********************************************************************************************
8426 };
8428 //*************************************************************************************************
8429 
8430 
8431 //*************************************************************************************************
8433 template< typename MT1, typename MT2, typename VT >
8434 struct DMatSVecMultExprTrait< DMatDMatMultExpr<MT1,MT2>, VT >
8435 {
8436  public:
8437  //**********************************************************************************************
8438  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8439  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
8440  IsSparseVector<VT>::value && IsColumnVector<VT>::value
8441  , typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
8442  , INVALID_TYPE >::Type Type;
8443  //**********************************************************************************************
8444 };
8446 //*************************************************************************************************
8447 
8448 
8449 //*************************************************************************************************
8451 template< typename VT, typename MT1, typename MT2 >
8452 struct TDVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
8453 {
8454  public:
8455  //**********************************************************************************************
8456  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
8457  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8458  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
8459  , typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
8460  , INVALID_TYPE >::Type Type;
8461  //**********************************************************************************************
8462 };
8464 //*************************************************************************************************
8465 
8466 
8467 //*************************************************************************************************
8469 template< typename VT, typename MT1, typename MT2 >
8470 struct TSVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2> >
8471 {
8472  public:
8473  //**********************************************************************************************
8474  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
8475  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8476  IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
8477  , typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
8478  , INVALID_TYPE >::Type Type;
8479  //**********************************************************************************************
8480 };
8482 //*************************************************************************************************
8483 
8484 
8485 //*************************************************************************************************
8487 template< typename MT1, typename MT2, bool AF >
8488 struct SubmatrixExprTrait< DMatDMatMultExpr<MT1,MT2>, AF >
8489 {
8490  public:
8491  //**********************************************************************************************
8492  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
8493  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
8494  //**********************************************************************************************
8495 };
8497 //*************************************************************************************************
8498 
8499 
8500 //*************************************************************************************************
8502 template< typename MT1, typename MT2 >
8503 struct RowExprTrait< DMatDMatMultExpr<MT1,MT2> >
8504 {
8505  public:
8506  //**********************************************************************************************
8507  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
8508  //**********************************************************************************************
8509 };
8511 //*************************************************************************************************
8512 
8513 
8514 //*************************************************************************************************
8516 template< typename MT1, typename MT2 >
8517 struct ColumnExprTrait< DMatDMatMultExpr<MT1,MT2> >
8518 {
8519  public:
8520  //**********************************************************************************************
8521  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
8522  //**********************************************************************************************
8523 };
8525 //*************************************************************************************************
8526 
8527 } // namespace blaze
8528 
8529 #endif
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1649
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:145
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
Header file for mathematical functions.
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
Header file for the IsUniUpper type trait.
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:105
Header file for basic type definitions.
Header file for the SparseVector base class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:142
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:320
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:416
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
DMatDMatMultExpr< MT1, MT2 > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:307
Header file for the IsSame and IsStrictlySame type traits.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:311
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
Header file for the And class template.
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:259
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:317
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:134
Header file for the IsUniLower type trait.
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2503
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:143
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:436
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsSymmetric type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
Header file for the Or class template.
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > > >::Type store(T *address, const sse_int16_t &value)
Aligned store of a vector of 2-byte integral values.
Definition: Store.h:80
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDMatMultExpr.h:312
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDMatMultExpr.h:480
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
Header file for the Not class template.
const size_t SMP_DMATDMATMULT_THRESHOLD
SMP row-major dense matrix/row-major dense matrix multiplication threshold.This threshold specifies w...
Definition: Thresholds.h:834
const size_t DMATDMATMULT_THRESHOLD
Row-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the thr...
Definition: Thresholds.h:125
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
Header file for BLAS level 3 functions.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Header file for the IsStrictlyTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:348
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDMatMultExpr.h:490
Header file for the IsDenseMatrix type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:309
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:458
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type load(const T *address)
Loads a vector of 2-byte integral values.
Definition: Load.h:79
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:446
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:363
System settings for the BLAS mode.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:499
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:308
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:150
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:314
Constraint on the data type.
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:426
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:310
Header file for the HasMutableDataAccess type trait.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:326
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:323
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:313
Header file for the IsRowMajorMatrix type trait.
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:937
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:140
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
Header file for the IsTrue value trait.
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
Header file for the complex data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:144
Header file for the IsUpper type trait.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:141
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:500
Header file for the IsResizable type trait.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:470
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849