DMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/blas/gemm.h>
44 #include <blaze/math/blas/trmm.h>
45 #include <blaze/math/Aliases.h>
52 #include <blaze/math/dense/MMM.h>
53 #include <blaze/math/Exception.h>
66 #include <blaze/math/shims/Reset.h>
68 #include <blaze/math/SIMD.h>
91 #include <blaze/system/BLAS.h>
92 #include <blaze/system/Blocking.h>
97 #include <blaze/util/Assert.h>
98 #include <blaze/util/Complex.h>
101 #include <blaze/util/DisableIf.h>
102 #include <blaze/util/EnableIf.h>
105 #include <blaze/util/InvalidType.h>
106 #include <blaze/util/mpl/And.h>
107 #include <blaze/util/mpl/Bool.h>
108 #include <blaze/util/mpl/If.h>
109 #include <blaze/util/mpl/Not.h>
110 #include <blaze/util/mpl/Or.h>
111 #include <blaze/util/TrueType.h>
112 #include <blaze/util/Types.h>
120 
121 
122 namespace blaze {
123 
124 //=================================================================================================
125 //
126 // CLASS DMATTDMATMULTEXPR
127 //
128 //=================================================================================================
129 
130 //*************************************************************************************************
137 template< typename MT1 // Type of the left-hand side dense matrix
138  , typename MT2 // Type of the right-hand side dense matrix
139  , bool SF // Symmetry flag
140  , bool HF // Hermitian flag
141  , bool LF // Lower flag
142  , bool UF > // Upper flag
144  : public MatMatMultExpr< DenseMatrix< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, false > >
145  , private Computation
146 {
147  private:
148  //**Type definitions****************************************************************************
155  //**********************************************************************************************
156 
157  //**********************************************************************************************
159  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
160  //**********************************************************************************************
161 
162  //**********************************************************************************************
164  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
165  //**********************************************************************************************
166 
167  //**********************************************************************************************
169  enum : bool {
170  SYM = ( SF && !( HF || LF || UF ) ),
171  HERM = ( HF && !( LF || UF ) ),
172  LOW = ( LF || ( ( SF || HF ) && UF ) ),
173  UPP = ( UF || ( ( SF || HF ) && LF ) )
174  };
175  //**********************************************************************************************
176 
177  //**********************************************************************************************
179 
183  template< typename T1, typename T2, typename T3 >
184  struct IsEvaluationRequired {
185  enum : bool { value = ( evaluateLeft || evaluateRight ) };
186  };
188  //**********************************************************************************************
189 
190  //**********************************************************************************************
192 
195  template< typename T1, typename T2, typename T3 >
196  struct UseBlasKernel {
197  enum : bool { value = BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION &&
198  !SYM && !HERM && !LOW && !UPP &&
203  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
208  IsSame< ElementType_<T1>, ElementType_<T3> >::value };
209  };
211  //**********************************************************************************************
212 
213  //**********************************************************************************************
215 
218  template< typename T1, typename T2, typename T3 >
219  struct UseVectorizedDefaultKernel {
220  enum : bool { value = useOptimizedKernels &&
222  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
225  , ElementType_<T3> >::value &&
228  };
230  //**********************************************************************************************
231 
232  //**********************************************************************************************
234 
237  using ForwardFunctor = IfTrue_< HERM
238  , DeclHerm
239  , IfTrue_< SYM
240  , DeclSym
241  , IfTrue_< LOW
242  , IfTrue_< UPP
243  , DeclDiag
244  , DeclLow >
245  , IfTrue_< UPP
246  , DeclUpp
247  , Noop > > > >;
249  //**********************************************************************************************
250 
251  public:
252  //**Type definitions****************************************************************************
255 
261  using ReturnType = const ElementType;
262  using CompositeType = const ResultType;
263 
265  using LeftOperand = If_< IsExpression<MT1>, const MT1, const MT1& >;
266 
268  using RightOperand = If_< IsExpression<MT2>, const MT2, const MT2& >;
269 
272 
275  //**********************************************************************************************
276 
277  //**Compilation flags***************************************************************************
279  enum : bool { simdEnabled = !IsDiagonal<MT1>::value && !IsDiagonal<MT2>::value &&
280  MT1::simdEnabled && MT2::simdEnabled &&
283 
285  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
286  !evaluateRight && MT2::smpAssignable };
287  //**********************************************************************************************
288 
289  //**SIMD properties*****************************************************************************
291  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
292  //**********************************************************************************************
293 
294  //**Constructor*********************************************************************************
300  explicit inline DMatTDMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
301  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
302  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
303  {
304  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
305  }
306  //**********************************************************************************************
307 
308  //**Access operator*****************************************************************************
315  inline ReturnType operator()( size_t i, size_t j ) const {
316  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
317  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
318 
319  if( IsDiagonal<MT1>::value ) {
320  return lhs_(i,i) * rhs_(i,j);
321  }
322  else if( IsDiagonal<MT2>::value ) {
323  return lhs_(i,j) * rhs_(j,j);
324  }
326  const size_t begin( ( IsUpper<MT1>::value )
327  ?( ( IsLower<MT2>::value )
328  ?( max( ( IsStrictlyUpper<MT1>::value ? i+1UL : i )
329  , ( IsStrictlyLower<MT2>::value ? j+1UL : j ) ) )
330  :( IsStrictlyUpper<MT1>::value ? i+1UL : i ) )
331  :( ( IsLower<MT2>::value )
332  ?( IsStrictlyLower<MT2>::value ? j+1UL : j )
333  :( 0UL ) ) );
334  const size_t end( ( IsLower<MT1>::value )
335  ?( ( IsUpper<MT2>::value )
336  ?( min( ( IsStrictlyLower<MT1>::value ? i : i+1UL )
337  , ( IsStrictlyUpper<MT2>::value ? j : j+1UL ) ) )
338  :( IsStrictlyLower<MT1>::value ? i : i+1UL ) )
339  :( ( IsUpper<MT2>::value )
340  ?( IsStrictlyUpper<MT2>::value ? j : j+1UL )
341  :( lhs_.columns() ) ) );
342 
343  if( begin >= end ) return ElementType();
344 
345  const size_t n( end - begin );
346 
347  return subvector( row( lhs_, i ), begin, n ) * subvector( column( rhs_, j ), begin, n );
348  }
349  else {
350  return row( lhs_, i ) * column( rhs_, j );
351  }
352  }
353  //**********************************************************************************************
354 
355  //**At function*********************************************************************************
363  inline ReturnType at( size_t i, size_t j ) const {
364  if( i >= lhs_.rows() ) {
365  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
366  }
367  if( j >= rhs_.columns() ) {
368  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
369  }
370  return (*this)(i,j);
371  }
372  //**********************************************************************************************
373 
374  //**Rows function*******************************************************************************
379  inline size_t rows() const noexcept {
380  return lhs_.rows();
381  }
382  //**********************************************************************************************
383 
384  //**Columns function****************************************************************************
389  inline size_t columns() const noexcept {
390  return rhs_.columns();
391  }
392  //**********************************************************************************************
393 
394  //**Left operand access*************************************************************************
399  inline LeftOperand leftOperand() const noexcept {
400  return lhs_;
401  }
402  //**********************************************************************************************
403 
404  //**Right operand access************************************************************************
409  inline RightOperand rightOperand() const noexcept {
410  return rhs_;
411  }
412  //**********************************************************************************************
413 
414  //**********************************************************************************************
420  template< typename T >
421  inline bool canAlias( const T* alias ) const noexcept {
422  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
423  }
424  //**********************************************************************************************
425 
426  //**********************************************************************************************
432  template< typename T >
433  inline bool isAliased( const T* alias ) const noexcept {
434  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
435  }
436  //**********************************************************************************************
437 
438  //**********************************************************************************************
443  inline bool isAligned() const noexcept {
444  return lhs_.isAligned() && rhs_.isAligned();
445  }
446  //**********************************************************************************************
447 
448  //**********************************************************************************************
453  inline bool canSMPAssign() const noexcept {
454  return ( !BLAZE_BLAS_MODE ||
455  !BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION ||
457  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
458  ( rows() * columns() >= SMP_DMATTDMATMULT_THRESHOLD ) &&
460  }
461  //**********************************************************************************************
462 
463  private:
464  //**Member variables****************************************************************************
467  //**********************************************************************************************
468 
469  //**Assignment to dense matrices****************************************************************
482  template< typename MT // Type of the target dense matrix
483  , bool SO > // Storage order of the target dense matrix
484  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
485  {
487 
488  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
489  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
490 
491  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
492  return;
493  }
494  else if( rhs.lhs_.columns() == 0UL ) {
495  reset( ~lhs );
496  return;
497  }
498 
499  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
500  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
501 
502  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
503  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
504  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
505  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
506  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
507  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
508 
509  DMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
510  }
512  //**********************************************************************************************
513 
514  //**Assignment to dense matrices (kernel selection)*********************************************
525  template< typename MT3 // Type of the left-hand side target matrix
526  , typename MT4 // Type of the left-hand side matrix operand
527  , typename MT5 > // Type of the right-hand side matrix operand
528  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
529  {
531  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
532  selectSmallAssignKernel( C, A, B );
533  else
534  selectBlasAssignKernel( C, A, B );
535  }
537  //**********************************************************************************************
538 
539  //**Default assignment to row-major dense matrices (general/general)****************************
553  template< typename MT3 // Type of the left-hand side target matrix
554  , typename MT4 // Type of the left-hand side matrix operand
555  , typename MT5 > // Type of the right-hand side matrix operand
557  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
558  {
559  const size_t M( A.rows() );
560  const size_t N( B.columns() );
561  const size_t K( A.columns() );
562 
563  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
564 
565  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
566  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
567  :( 0UL ) );
568  const size_t iend( ( IsStrictlyUpper<MT4>::value )
569  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
570  :( M ) );
571  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
572 
573  for( size_t i=0UL; i<ibegin; ++i ) {
574  for( size_t j=0UL; j<N; ++j ) {
575  reset( (~C)(i,j) );
576  }
577  }
578  for( size_t i=ibegin; i<iend; ++i )
579  {
580  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
582  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
583  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
585  ?( SYM || HERM || UPP ? max( i, 1UL ) : 1UL )
586  :( SYM || HERM || UPP ? i : 0UL ) ) );
587  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
589  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
590  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
592  ?( LOW ? min(i+1UL,N-1UL) : N-1UL )
593  :( LOW ? i+1UL : N ) ) );
594 
595  if( ( SYM || HERM || LOW || UPP ) && ( jbegin > jend ) ) {
596  for( size_t j=0UL; j<N; ++j ) {
597  reset( (~C)(i,j) );
598  }
599  continue;
600  }
601 
602  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
603 
604  for( size_t j=( SYM || HERM ? i : 0UL ); j<jbegin; ++j ) {
605  reset( (~C)(i,j) );
606  }
607  for( size_t j=jbegin; j<jend; ++j )
608  {
609  const size_t kbegin( ( IsUpper<MT4>::value )
610  ?( ( IsLower<MT5>::value )
611  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
612  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
613  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
614  :( ( IsLower<MT5>::value )
615  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
616  :( 0UL ) ) );
617  const size_t kend( ( IsLower<MT4>::value )
618  ?( ( IsUpper<MT5>::value )
619  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
620  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
621  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
622  :( ( IsUpper<MT5>::value )
623  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
624  :( K ) ) );
625  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
626 
627  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
628  for( size_t k=kbegin+1UL; k<kend; ++k ) {
629  (~C)(i,j) += A(i,k) * B(k,j);
630  }
631  }
632  for( size_t j=jend; j<N; ++j ) {
633  reset( (~C)(i,j) );
634  }
635  }
636  for( size_t i=iend; i<M; ++i ) {
637  for( size_t j=0UL; j<N; ++j ) {
638  reset( (~C)(i,j) );
639  }
640  }
641 
642  if( SYM || HERM ) {
643  for( size_t i=1UL; i<M; ++i ) {
644  for( size_t j=0UL; j<i; ++j ) {
645  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
646  }
647  }
648  }
649  }
651  //**********************************************************************************************
652 
653  //**Default assignment to column-major dense matrices (general/general)*************************
667  template< typename MT3 // Type of the left-hand side target matrix
668  , typename MT4 // Type of the left-hand side matrix operand
669  , typename MT5 > // Type of the right-hand side matrix operand
670  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
671  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
672  {
673  const size_t M( A.rows() );
674  const size_t N( B.columns() );
675  const size_t K( A.columns() );
676 
677  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
678 
679  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
680  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
681  :( 0UL ) );
682  const size_t jend( ( IsStrictlyLower<MT5>::value )
683  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
684  :( N ) );
685  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
686 
687  for( size_t j=0UL; j<jbegin; ++j ) {
688  for( size_t i=0UL; i<M; ++i ) {
689  reset( (~C)(i,j) );
690  }
691  }
692  for( size_t j=jbegin; j<jend; ++j )
693  {
694  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
696  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
697  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
699  ?( SYM || HERM || LOW ? max( j, 1UL ) : 1UL )
700  :( SYM || HERM || LOW ? j : 0UL ) ) );
701  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
703  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
704  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
706  ?( UPP ? min(j+1UL,M-1UL) : M-1UL )
707  :( UPP ? j+1UL : M ) ) );
708 
709  if( ( SYM || HERM || LOW || UPP ) && ( ibegin > iend ) ) {
710  for( size_t i=0UL; i<M; ++i ) {
711  reset( (~C)(i,j) );
712  }
713  continue;
714  }
715 
716  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
717 
718  for( size_t i=( SYM || HERM ? j : 0UL ); i<ibegin; ++i ) {
719  reset( (~C)(i,j) );
720  }
721  for( size_t i=ibegin; i<iend; ++i )
722  {
723  const size_t kbegin( ( IsUpper<MT4>::value )
724  ?( ( IsLower<MT5>::value )
725  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
726  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
727  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
728  :( ( IsLower<MT5>::value )
729  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
730  :( 0UL ) ) );
731  const size_t kend( ( IsLower<MT4>::value )
732  ?( ( IsUpper<MT5>::value )
733  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
734  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
735  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
736  :( ( IsUpper<MT5>::value )
737  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
738  :( K ) ) );
739  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
740 
741  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
742  for( size_t k=kbegin+1UL; k<kend; ++k ) {
743  (~C)(i,j) += A(i,k) * B(k,j);
744  }
745  }
746  for( size_t i=iend; i<M; ++i ) {
747  reset( (~C)(i,j) );
748  }
749  }
750  for( size_t j=jend; j<N; ++j ) {
751  for( size_t i=0UL; i<M; ++i ) {
752  reset( (~C)(i,j) );
753  }
754  }
755 
756  if( SYM || HERM ) {
757  for( size_t j=1UL; j<N; ++j ) {
758  for( size_t i=0UL; i<j; ++i ) {
759  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
760  }
761  }
762  }
763  }
765  //**********************************************************************************************
766 
767  //**Default assignment to row-major dense matrices (general/diagonal)***************************
781  template< typename MT3 // Type of the left-hand side target matrix
782  , typename MT4 // Type of the left-hand side matrix operand
783  , typename MT5 > // Type of the right-hand side matrix operand
784  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
785  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
786  {
787  const size_t M( A.rows() );
788  const size_t N( B.columns() );
789 
790  for( size_t i=0UL; i<M; ++i )
791  {
792  const size_t jbegin( ( IsUpper<MT4>::value )
793  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
794  :( 0UL ) );
795  const size_t jend( ( IsLower<MT4>::value )
796  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
797  :( N ) );
798  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
799 
800  if( IsUpper<MT4>::value ) {
801  for( size_t j=0UL; j<jbegin; ++j ) {
802  reset( (~C)(i,j) );
803  }
804  }
805  for( size_t j=jbegin; j<jend; ++j ) {
806  (~C)(i,j) = A(i,j) * B(j,j);
807  }
808  if( IsLower<MT4>::value ) {
809  for( size_t j=jend; j<N; ++j ) {
810  reset( (~C)(i,j) );
811  }
812  }
813  }
814  }
816  //**********************************************************************************************
817 
818  //**Default assignment to column-major dense matrices (general/diagonal)************************
832  template< typename MT3 // Type of the left-hand side target matrix
833  , typename MT4 // Type of the left-hand side matrix operand
834  , typename MT5 > // Type of the right-hand side matrix operand
835  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
836  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
837  {
838  constexpr size_t block( BLOCK_SIZE );
839 
840  const size_t M( A.rows() );
841  const size_t N( B.columns() );
842 
843  for( size_t jj=0UL; jj<N; jj+=block ) {
844  const size_t jend( min( N, jj+block ) );
845  for( size_t ii=0UL; ii<M; ii+=block ) {
846  const size_t iend( min( M, ii+block ) );
847  for( size_t j=jj; j<jend; ++j )
848  {
849  const size_t ibegin( ( IsLower<MT4>::value )
850  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
851  :( ii ) );
852  const size_t ipos( ( IsUpper<MT4>::value )
853  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
854  :( iend ) );
855 
856  if( IsLower<MT4>::value ) {
857  for( size_t i=ii; i<ibegin; ++i ) {
858  reset( (~C)(i,j) );
859  }
860  }
861  for( size_t i=ibegin; i<ipos; ++i ) {
862  (~C)(i,j) = A(i,j) * B(j,j);
863  }
864  if( IsUpper<MT4>::value ) {
865  for( size_t i=ipos; i<iend; ++i ) {
866  reset( (~C)(i,j) );
867  }
868  }
869  }
870  }
871  }
872  }
874  //**********************************************************************************************
875 
876  //**Default assignment to row-major dense matrices (diagonal/general)***************************
890  template< typename MT3 // Type of the left-hand side target matrix
891  , typename MT4 // Type of the left-hand side matrix operand
892  , typename MT5 > // Type of the right-hand side matrix operand
894  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
895  {
896  constexpr size_t block( BLOCK_SIZE );
897 
898  const size_t M( A.rows() );
899  const size_t N( B.columns() );
900 
901  for( size_t ii=0UL; ii<M; ii+=block ) {
902  const size_t iend( min( M, ii+block ) );
903  for( size_t jj=0UL; jj<N; jj+=block ) {
904  const size_t jend( min( N, jj+block ) );
905  for( size_t i=ii; i<iend; ++i )
906  {
907  const size_t jbegin( ( IsUpper<MT5>::value )
908  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
909  :( jj ) );
910  const size_t jpos( ( IsLower<MT5>::value )
911  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
912  :( jend ) );
913 
914  if( IsUpper<MT5>::value ) {
915  for( size_t j=jj; j<jbegin; ++j ) {
916  reset( (~C)(i,j) );
917  }
918  }
919  for( size_t j=jbegin; j<jpos; ++j ) {
920  (~C)(i,j) = A(i,i) * B(i,j);
921  }
922  if( IsLower<MT5>::value ) {
923  for( size_t j=jpos; j<jend; ++j ) {
924  reset( (~C)(i,j) );
925  }
926  }
927  }
928  }
929  }
930  }
932  //**********************************************************************************************
933 
934  //**Default assignment to column-major dense matrices (diagonal/general)************************
948  template< typename MT3 // Type of the left-hand side target matrix
949  , typename MT4 // Type of the left-hand side matrix operand
950  , typename MT5 > // Type of the right-hand side matrix operand
951  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
952  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
953  {
954  const size_t M( A.rows() );
955  const size_t N( B.columns() );
956 
957  for( size_t j=0UL; j<N; ++j )
958  {
959  const size_t ibegin( ( IsLower<MT5>::value )
960  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
961  :( 0UL ) );
962  const size_t iend( ( IsUpper<MT5>::value )
963  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
964  :( M ) );
965  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
966 
967  if( IsLower<MT5>::value ) {
968  for( size_t i=0UL; i<ibegin; ++i ) {
969  reset( (~C)(i,j) );
970  }
971  }
972  for( size_t i=ibegin; i<iend; ++i ) {
973  (~C)(i,j) = A(i,i) * B(i,j);
974  }
975  if( IsUpper<MT5>::value ) {
976  for( size_t i=iend; i<M; ++i ) {
977  reset( (~C)(i,j) );
978  }
979  }
980  }
981  }
983  //**********************************************************************************************
984 
985  //**Default assignment to dense matrices (diagonal/diagonal)************************************
999  template< typename MT3 // Type of the left-hand side target matrix
1000  , typename MT4 // Type of the left-hand side matrix operand
1001  , typename MT5 > // Type of the right-hand side matrix operand
1002  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
1003  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
1004  {
1005  reset( C );
1006 
1007  for( size_t i=0UL; i<A.rows(); ++i ) {
1008  C(i,i) = A(i,i) * B(i,i);
1009  }
1010  }
1012  //**********************************************************************************************
1013 
1014  //**Default assignment to dense matrices (small matrices)***************************************
1028  template< typename MT3 // Type of the left-hand side target matrix
1029  , typename MT4 // Type of the left-hand side matrix operand
1030  , typename MT5 > // Type of the right-hand side matrix operand
1032  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B )
1033  {
1034  selectDefaultAssignKernel( C, A, B );
1035  }
1037  //**********************************************************************************************
1038 
1039  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
1054  template< typename MT3 // Type of the left-hand side target matrix
1055  , typename MT4 // Type of the left-hand side matrix operand
1056  , typename MT5 > // Type of the right-hand side matrix operand
1058  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1059  {
1060  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
1061 
1062  const size_t M( A.rows() );
1063  const size_t N( B.columns() );
1064  const size_t K( A.columns() );
1065 
1066  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1067 
1068  if( LOW && UPP ) {
1069  reset( ~C );
1070  }
1071 
1072  {
1073  size_t i( 0UL );
1074 
1075  for( ; !( LOW && UPP ) && (i+2UL) <= M; i+=2UL )
1076  {
1077  const size_t jend( LOW ? i+2UL : N );
1078  size_t j( SYM || HERM || UPP ? i : 0UL );
1079 
1080  for( ; (j+4UL) <= jend; j+=4UL )
1081  {
1082  const size_t kbegin( ( IsUpper<MT4>::value )
1083  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1084  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1085  const size_t kend( ( IsLower<MT4>::value )
1086  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
1087  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
1088 
1089  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1090  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1091 
1092  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1093  size_t k( kbegin );
1094 
1095  for( ; k<kpos; k+=SIMDSIZE ) {
1096  const SIMDType a1( A.load(i ,k) );
1097  const SIMDType a2( A.load(i+1UL,k) );
1098  const SIMDType b1( B.load(k,j ) );
1099  const SIMDType b2( B.load(k,j+1UL) );
1100  const SIMDType b3( B.load(k,j+2UL) );
1101  const SIMDType b4( B.load(k,j+3UL) );
1102  xmm1 += a1 * b1;
1103  xmm2 += a1 * b2;
1104  xmm3 += a1 * b3;
1105  xmm4 += a1 * b4;
1106  xmm5 += a2 * b1;
1107  xmm6 += a2 * b2;
1108  xmm7 += a2 * b3;
1109  xmm8 += a2 * b4;
1110  }
1111 
1112  (~C)(i ,j ) = sum( xmm1 );
1113  (~C)(i ,j+1UL) = sum( xmm2 );
1114  (~C)(i ,j+2UL) = sum( xmm3 );
1115  (~C)(i ,j+3UL) = sum( xmm4 );
1116  (~C)(i+1UL,j ) = sum( xmm5 );
1117  (~C)(i+1UL,j+1UL) = sum( xmm6 );
1118  (~C)(i+1UL,j+2UL) = sum( xmm7 );
1119  (~C)(i+1UL,j+3UL) = sum( xmm8 );
1120 
1121  for( ; remainder && k<kend; ++k ) {
1122  (~C)(i ,j ) += A(i ,k) * B(k,j );
1123  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1124  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL);
1125  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL);
1126  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1127  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1128  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL);
1129  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL);
1130  }
1131  }
1132 
1133  for( ; (j+2UL) <= jend; j+=2UL )
1134  {
1135  const size_t kbegin( ( IsUpper<MT4>::value )
1136  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1137  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1138  const size_t kend( ( IsLower<MT4>::value )
1139  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
1140  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1141 
1142  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1143  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1144 
1145  SIMDType xmm1, xmm2, xmm3, xmm4;
1146  size_t k( kbegin );
1147 
1148  for( ; k<kpos; k+=SIMDSIZE ) {
1149  const SIMDType a1( A.load(i ,k) );
1150  const SIMDType a2( A.load(i+1UL,k) );
1151  const SIMDType b1( B.load(k,j ) );
1152  const SIMDType b2( B.load(k,j+1UL) );
1153  xmm1 += a1 * b1;
1154  xmm2 += a1 * b2;
1155  xmm3 += a2 * b1;
1156  xmm4 += a2 * b2;
1157  }
1158 
1159  (~C)(i ,j ) = sum( xmm1 );
1160  (~C)(i ,j+1UL) = sum( xmm2 );
1161  (~C)(i+1UL,j ) = sum( xmm3 );
1162  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1163 
1164  for( ; remainder && k<kend; ++k ) {
1165  (~C)(i ,j ) += A(i ,k) * B(k,j );
1166  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1167  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1168  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1169  }
1170  }
1171 
1172  if( j < jend )
1173  {
1174  const size_t kbegin( ( IsUpper<MT4>::value )
1175  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1176  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1177  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
1178 
1179  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1180  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1181 
1182  SIMDType xmm1, xmm2;
1183  size_t k( kbegin );
1184 
1185  for( ; k<kpos; k+=SIMDSIZE ) {
1186  const SIMDType b1( B.load(k,j) );
1187  xmm1 += A.load(i ,k) * b1;
1188  xmm2 += A.load(i+1UL,k) * b1;
1189  }
1190 
1191  (~C)(i ,j) = sum( xmm1 );
1192  (~C)(i+1UL,j) = sum( xmm2 );
1193 
1194  for( ; remainder && k<kend; ++k ) {
1195  (~C)(i ,j) += A(i ,k) * B(k,j);
1196  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
1197  }
1198  }
1199  }
1200 
1201  for( ; i<M; ++i )
1202  {
1203  const size_t jend( LOW ? i+1UL : N );
1204  size_t j( SYM || HERM || UPP ? i : 0UL );
1205 
1206  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
1207  {
1208  const size_t kbegin( ( IsUpper<MT4>::value )
1209  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1210  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1211  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
1212 
1213  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1214  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1215 
1216  SIMDType xmm1, xmm2, xmm3, xmm4;
1217  size_t k( kbegin );
1218 
1219  for( ; k<kpos; k+=SIMDSIZE ) {
1220  const SIMDType a1( A.load(i,k) );
1221  xmm1 += a1 * B.load(k,j );
1222  xmm2 += a1 * B.load(k,j+1UL);
1223  xmm3 += a1 * B.load(k,j+2UL);
1224  xmm4 += a1 * B.load(k,j+3UL);
1225  }
1226 
1227  (~C)(i,j ) = sum( xmm1 );
1228  (~C)(i,j+1UL) = sum( xmm2 );
1229  (~C)(i,j+2UL) = sum( xmm3 );
1230  (~C)(i,j+3UL) = sum( xmm4 );
1231 
1232  for( ; remainder && k<kend; ++k ) {
1233  (~C)(i,j ) += A(i,k) * B(k,j );
1234  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
1235  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL);
1236  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL);
1237  }
1238  }
1239 
1240  for( ; !( LOW && UPP ) && (j+2UL) <= jend; j+=2UL )
1241  {
1242  const size_t kbegin( ( IsUpper<MT4>::value )
1243  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1244  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1245  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
1246 
1247  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1248  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1249 
1250  SIMDType xmm1, xmm2;
1251  size_t k( kbegin );
1252 
1253  for( ; k<kpos; k+=SIMDSIZE ) {
1254  const SIMDType a1( A.load(i,k) );
1255  xmm1 += a1 * B.load(k,j );
1256  xmm2 += a1 * B.load(k,j+1UL);
1257  }
1258 
1259  (~C)(i,j ) = sum( xmm1 );
1260  (~C)(i,j+1UL) = sum( xmm2 );
1261 
1262  for( ; remainder && k<kend; ++k ) {
1263  (~C)(i,j ) += A(i,k) * B(k,j );
1264  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
1265  }
1266  }
1267 
1268  for( ; j<jend; ++j )
1269  {
1270  const size_t kbegin( ( IsUpper<MT4>::value )
1271  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1272  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1273 
1274  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
1275  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1276 
1277  SIMDType xmm1;
1278  size_t k( kbegin );
1279 
1280  for( ; k<kpos; k+=SIMDSIZE ) {
1281  xmm1 += A.load(i,k) * B.load(k,j);
1282  }
1283 
1284  (~C)(i,j) = sum( xmm1 );
1285 
1286  for( ; remainder && k<K; ++k ) {
1287  (~C)(i,j) += A(i,k) * B(k,j);
1288  }
1289  }
1290  }
1291  }
1292 
1293  if( SYM || HERM ) {
1294  for( size_t i=2UL; i<M; ++i ) {
1295  const size_t jend( 2UL * ( i/2UL ) );
1296  for( size_t j=0UL; j<jend; ++j ) {
1297  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
1298  }
1299  }
1300  }
1301  else if( LOW && !UPP ) {
1302  for( size_t j=2UL; j<N; ++j ) {
1303  const size_t iend( 2UL * ( j/2UL ) );
1304  for( size_t i=0UL; i<iend; ++i ) {
1305  reset( (~C)(i,j) );
1306  }
1307  }
1308  }
1309  else if( !LOW && UPP ) {
1310  for( size_t i=2UL; i<M; ++i ) {
1311  const size_t jend( 2UL * ( i/2UL ) );
1312  for( size_t j=0UL; j<jend; ++j ) {
1313  reset( (~C)(i,j) );
1314  }
1315  }
1316  }
1317  }
1319  //**********************************************************************************************
1320 
1321  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
1336  template< typename MT3 // Type of the left-hand side target matrix
1337  , typename MT4 // Type of the left-hand side matrix operand
1338  , typename MT5 > // Type of the right-hand side matrix operand
1340  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1341  {
1342  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
1343 
1344  const size_t M( A.rows() );
1345  const size_t N( B.columns() );
1346  const size_t K( A.columns() );
1347 
1348  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1349 
1350  if( LOW && UPP ) {
1351  reset( ~C );
1352  }
1353 
1354  {
1355  size_t i( 0UL );
1356 
1357  for( ; !( LOW && UPP ) && (i+4UL) <= M; i+=4UL )
1358  {
1359  const size_t jend( SYM || HERM || LOW ? i+4UL : N );
1360  size_t j( UPP ? i : 0UL );
1361 
1362  for( ; (j+2UL) <= jend; j+=2UL )
1363  {
1364  const size_t kbegin( ( IsUpper<MT4>::value )
1365  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1366  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1367  const size_t kend( ( IsLower<MT4>::value )
1368  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
1369  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1370 
1371  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1372  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1373 
1374  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1375  size_t k( kbegin );
1376 
1377  for( ; k<kpos; k+=SIMDSIZE ) {
1378  const SIMDType a1( A.load(i ,k) );
1379  const SIMDType a2( A.load(i+1UL,k) );
1380  const SIMDType a3( A.load(i+2UL,k) );
1381  const SIMDType a4( A.load(i+3UL,k) );
1382  const SIMDType b1( B.load(k,j ) );
1383  const SIMDType b2( B.load(k,j+1UL) );
1384  xmm1 += a1 * b1;
1385  xmm2 += a1 * b2;
1386  xmm3 += a2 * b1;
1387  xmm4 += a2 * b2;
1388  xmm5 += a3 * b1;
1389  xmm6 += a3 * b2;
1390  xmm7 += a4 * b1;
1391  xmm8 += a4 * b2;
1392  }
1393 
1394  (~C)(i ,j ) = sum( xmm1 );
1395  (~C)(i ,j+1UL) = sum( xmm2 );
1396  (~C)(i+1UL,j ) = sum( xmm3 );
1397  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1398  (~C)(i+2UL,j ) = sum( xmm5 );
1399  (~C)(i+2UL,j+1UL) = sum( xmm6 );
1400  (~C)(i+3UL,j ) = sum( xmm7 );
1401  (~C)(i+3UL,j+1UL) = sum( xmm8 );
1402 
1403  for( ; remainder && k<kend; ++k ) {
1404  (~C)(i ,j ) += A(i ,k) * B(k,j );
1405  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1406  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1407  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1408  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j );
1409  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL);
1410  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j );
1411  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL);
1412  }
1413  }
1414 
1415  if( j < jend )
1416  {
1417  const size_t kbegin( ( IsUpper<MT4>::value )
1418  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1419  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1420  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
1421 
1422  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1423  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1424 
1425  SIMDType xmm1, xmm2, xmm3, xmm4;
1426  size_t k( kbegin );
1427 
1428  for( ; k<kpos; k+=SIMDSIZE ) {
1429  const SIMDType b1( B.load(k,j) );
1430  xmm1 += A.load(i ,k) * b1;
1431  xmm2 += A.load(i+1UL,k) * b1;
1432  xmm3 += A.load(i+2UL,k) * b1;
1433  xmm4 += A.load(i+3UL,k) * b1;
1434  }
1435 
1436  (~C)(i ,j) = sum( xmm1 );
1437  (~C)(i+1UL,j) = sum( xmm2 );
1438  (~C)(i+2UL,j) = sum( xmm3 );
1439  (~C)(i+3UL,j) = sum( xmm4 );
1440 
1441  for( ; remainder && k<kend; ++k ) {
1442  (~C)(i ,j) += A(i ,k) * B(k,j);
1443  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
1444  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j);
1445  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j);
1446  }
1447  }
1448  }
1449 
1450  for( ; !( LOW && UPP ) && (i+2UL) <= M; i+=2UL )
1451  {
1452  size_t j( 0UL );
1453 
1454  for( ; (j+2UL) <= N; j+=2UL )
1455  {
1456  const size_t kbegin( ( IsUpper<MT4>::value )
1457  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1458  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1459  const size_t kend( ( IsLower<MT4>::value )
1460  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
1461  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1462 
1463  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1464  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1465 
1466  SIMDType xmm1, xmm2, xmm3, xmm4;
1467  size_t k( kbegin );
1468 
1469  for( ; k<kpos; k+=SIMDSIZE ) {
1470  const SIMDType a1( A.load(i ,k) );
1471  const SIMDType a2( A.load(i+1UL,k) );
1472  const SIMDType b1( B.load(k,j ) );
1473  const SIMDType b2( B.load(k,j+1UL) );
1474  xmm1 += a1 * b1;
1475  xmm2 += a1 * b2;
1476  xmm3 += a2 * b1;
1477  xmm4 += a2 * b2;
1478  }
1479 
1480  (~C)(i ,j ) = sum( xmm1 );
1481  (~C)(i ,j+1UL) = sum( xmm2 );
1482  (~C)(i+1UL,j ) = sum( xmm3 );
1483  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1484 
1485  for( ; remainder && k<kend; ++k ) {
1486  (~C)(i ,j ) += A(i ,k) * B(k,j );
1487  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1488  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1489  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1490  }
1491  }
1492 
1493  if( j < N )
1494  {
1495  const size_t kbegin( ( IsUpper<MT4>::value )
1496  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1497  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1498  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
1499 
1500  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1501  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1502 
1503  SIMDType xmm1, xmm2;
1504  size_t k( kbegin );
1505 
1506  for( ; k<kpos; k+=SIMDSIZE ) {
1507  const SIMDType b1( B.load(k,j) );
1508  xmm1 += A.load(i ,k) * b1;
1509  xmm2 += A.load(i+1UL,k) * b1;
1510  }
1511 
1512  (~C)(i ,j) = sum( xmm1 );
1513  (~C)(i+1UL,j) = sum( xmm2 );
1514 
1515  for( ; remainder && k<kend; ++k ) {
1516  (~C)(i ,j) += A(i ,k) * B(k,j);
1517  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
1518  }
1519  }
1520  }
1521 
1522  for( ; i<M; ++i )
1523  {
1524  const size_t jend( LOW && UPP ? i+1UL : N );
1525  size_t j( LOW && UPP ? i : 0UL );
1526 
1527  for( ; !( LOW && UPP ) && (j+2UL) <= jend; j+=2UL )
1528  {
1529  const size_t kbegin( ( IsUpper<MT4>::value )
1530  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1531  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1532  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
1533 
1534  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1535  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1536 
1537  SIMDType xmm1, xmm2;
1538  size_t k( kbegin );
1539 
1540  for( ; k<kpos; k+=SIMDSIZE ) {
1541  const SIMDType a1( A.load(i,k) );
1542  xmm1 += a1 * B.load(k,j );
1543  xmm2 += a1 * B.load(k,j+1UL);
1544  }
1545 
1546  (~C)(i,j ) = sum( xmm1 );
1547  (~C)(i,j+1UL) = sum( xmm2 );
1548 
1549  for( ; remainder && k<kend; ++k ) {
1550  (~C)(i,j ) += A(i,k) * B(k,j );
1551  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
1552  }
1553  }
1554 
1555  for( ; j<jend; ++j )
1556  {
1557  const size_t kbegin( ( IsUpper<MT4>::value )
1558  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1559  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1560 
1561  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
1562  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1563 
1564  SIMDType xmm1;
1565  size_t k( kbegin );
1566 
1567  for( ; k<kpos; k+=SIMDSIZE ) {
1568  xmm1 += A.load(i,k) * B.load(k,j);
1569  }
1570 
1571  (~C)(i,j) = sum( xmm1 );
1572 
1573  for( ; remainder && k<K; ++k ) {
1574  (~C)(i,j) += A(i,k) * B(k,j);
1575  }
1576  }
1577  }
1578  }
1579 
1580  if( ( SYM || HERM ) && ( N > 4UL ) ) {
1581  for( size_t j=4UL; j<N; ++j ) {
1582  const size_t iend( 4UL * ( j/4UL ) );
1583  for( size_t i=0UL; i<iend; ++i ) {
1584  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
1585  }
1586  }
1587  }
1588  else if( LOW && !UPP ) {
1589  for( size_t j=4UL; j<N; ++j ) {
1590  const size_t iend( 4UL * ( j/4UL ) );
1591  for( size_t i=0UL; i<iend; ++i ) {
1592  reset( (~C)(i,j) );
1593  }
1594  }
1595  }
1596  else if( !LOW && UPP ) {
1597  for( size_t i=4UL; i<N; ++i ) {
1598  const size_t jend( 4UL * ( i/4UL ) );
1599  for( size_t j=0UL; j<jend; ++j ) {
1600  reset( (~C)(i,j) );
1601  }
1602  }
1603  }
1604  }
1606  //**********************************************************************************************
1607 
1608  //**Default assignment to dense matrices (large matrices)***************************************
1622  template< typename MT3 // Type of the left-hand side target matrix
1623  , typename MT4 // Type of the left-hand side matrix operand
1624  , typename MT5 > // Type of the right-hand side matrix operand
1626  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1627  {
1628  selectDefaultAssignKernel( C, A, B );
1629  }
1631  //**********************************************************************************************
1632 
1633  //**Vectorized default assignment to dense matrices (large matrices)****************************
1648  template< typename MT3 // Type of the left-hand side target matrix
1649  , typename MT4 // Type of the left-hand side matrix operand
1650  , typename MT5 > // Type of the right-hand side matrix operand
1652  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1653  {
1654  if( SYM )
1655  smmm( C, A, B, ElementType(1) );
1656  else if( HERM )
1657  hmmm( C, A, B, ElementType(1) );
1658  else if( LOW )
1659  lmmm( C, A, B, ElementType(1), ElementType(0) );
1660  else if( UPP )
1661  ummm( C, A, B, ElementType(1), ElementType(0) );
1662  else
1663  mmm( C, A, B, ElementType(1), ElementType(0) );
1664  }
1666  //**********************************************************************************************
1667 
1668  //**BLAS-based assignment to dense matrices (default)*******************************************
1682  template< typename MT3 // Type of the left-hand side target matrix
1683  , typename MT4 // Type of the left-hand side matrix operand
1684  , typename MT5 > // Type of the right-hand side matrix operand
1686  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1687  {
1688  selectLargeAssignKernel( C, A, B );
1689  }
1691  //**********************************************************************************************
1692 
1693  //**BLAS-based assignment to dense matrices*****************************************************
1694 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
1695 
1708  template< typename MT3 // Type of the left-hand side target matrix
1709  , typename MT4 // Type of the left-hand side matrix operand
1710  , typename MT5 > // Type of the right-hand side matrix operand
1712  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1713  {
1714  using ET = ElementType_<MT3>;
1715 
1716  if( IsTriangular<MT4>::value ) {
1717  assign( C, B );
1718  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1719  }
1720  else if( IsTriangular<MT5>::value ) {
1721  assign( C, A );
1722  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1723  }
1724  else {
1725  gemm( C, A, B, ET(1), ET(0) );
1726  }
1727  }
1729 #endif
1730  //**********************************************************************************************
1731 
1732  //**Assignment to sparse matrices***************************************************************
1745  template< typename MT // Type of the target sparse matrix
1746  , bool SO > // Storage order of the target sparse matrix
1747  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1748  {
1750 
1752 
1759 
1760  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1761  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1762 
1763  const ForwardFunctor fwd;
1764 
1765  const TmpType tmp( serial( rhs ) );
1766  assign( ~lhs, fwd( tmp ) );
1767  }
1769  //**********************************************************************************************
1770 
1771  //**Addition assignment to dense matrices*******************************************************
1784  template< typename MT // Type of the target dense matrix
1785  , bool SO > // Storage order of the target dense matrix
1786  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1787  {
1789 
1790  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1791  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1792 
1793  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1794  return;
1795  }
1796 
1797  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1798  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1799 
1800  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1801  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1802  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1803  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1804  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1805  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1806 
1807  DMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1808  }
1810  //**********************************************************************************************
1811 
1812  //**Addition assignment to dense matrices (kernel selection)************************************
1823  template< typename MT3 // Type of the left-hand side target matrix
1824  , typename MT4 // Type of the left-hand side matrix operand
1825  , typename MT5 > // Type of the right-hand side matrix operand
1826  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1827  {
1829  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
1830  selectSmallAddAssignKernel( C, A, B );
1831  else
1832  selectBlasAddAssignKernel( C, A, B );
1833  }
1835  //**********************************************************************************************
1836 
1837  //**Default addition assignment to row-major dense matrices (general/general)*******************
1851  template< typename MT3 // Type of the left-hand side target matrix
1852  , typename MT4 // Type of the left-hand side matrix operand
1853  , typename MT5 > // Type of the right-hand side matrix operand
1854  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
1855  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1856  {
1857  const size_t M( A.rows() );
1858  const size_t N( B.columns() );
1859  const size_t K( A.columns() );
1860 
1861  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1862 
1863  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
1864  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
1865  :( 0UL ) );
1866  const size_t iend( ( IsStrictlyUpper<MT4>::value )
1867  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
1868  :( M ) );
1869  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1870 
1871  for( size_t i=ibegin; i<iend; ++i )
1872  {
1873  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
1875  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
1876  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
1878  ?( UPP ? max( i, 1UL ) : 1UL )
1879  :( UPP ? i : 0UL ) ) );
1880  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
1882  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
1883  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
1885  ?( LOW ? min(i+1UL,N-1UL) : N-1UL )
1886  :( LOW ? i+1UL : N ) ) );
1887 
1888  if( ( LOW || UPP ) && ( jbegin > jend ) ) continue;
1889  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1890 
1891  for( size_t j=jbegin; j<jend; ++j )
1892  {
1893  const size_t kbegin( ( IsUpper<MT4>::value )
1894  ?( ( IsLower<MT5>::value )
1895  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1896  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1897  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1898  :( ( IsLower<MT5>::value )
1899  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1900  :( 0UL ) ) );
1901  const size_t kend( ( IsLower<MT4>::value )
1902  ?( ( IsUpper<MT5>::value )
1903  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
1904  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
1905  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1906  :( ( IsUpper<MT5>::value )
1907  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1908  :( K ) ) );
1909  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
1910 
1911  const size_t knum( kend - kbegin );
1912  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
1913 
1914  for( size_t k=kbegin; k<kpos; k+=2UL ) {
1915  (~C)(i,j) += A(i,k ) * B(k ,j);
1916  (~C)(i,j) += A(i,k+1UL) * B(k+1UL,j);
1917  }
1918  if( kpos < kend ) {
1919  (~C)(i,j) += A(i,kpos) * B(kpos,j);
1920  }
1921  }
1922  }
1923  }
1925  //**********************************************************************************************
1926 
1927  //**Default addition assignment to column-major dense matrices (general/general)****************
1941  template< typename MT3 // Type of the left-hand side target matrix
1942  , typename MT4 // Type of the left-hand side matrix operand
1943  , typename MT5 > // Type of the right-hand side matrix operand
1944  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
1945  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1946  {
1947  const size_t M( A.rows() );
1948  const size_t N( B.columns() );
1949  const size_t K( A.columns() );
1950 
1951  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1952 
1953  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
1954  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
1955  :( 0UL ) );
1956  const size_t jend( ( IsStrictlyLower<MT5>::value )
1957  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
1958  :( N ) );
1959  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1960 
1961  for( size_t j=jbegin; j<jend; ++j )
1962  {
1963  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
1965  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
1966  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1968  ?( LOW ? max( j, 1UL ) : 1UL )
1969  :( LOW ? j : 0UL ) ) );
1970  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
1972  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
1973  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
1975  ?( UPP ? min(j+1UL,M-1UL) : M-1UL )
1976  :( UPP ? j+1UL : M ) ) );
1977 
1978  if( ( LOW || UPP ) && ( ibegin > iend ) ) continue;
1979  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1980 
1981  for( size_t i=ibegin; i<iend; ++i )
1982  {
1983  const size_t kbegin( ( IsUpper<MT4>::value )
1984  ?( ( IsLower<MT5>::value )
1985  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1986  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1987  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1988  :( ( IsLower<MT5>::value )
1989  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1990  :( 0UL ) ) );
1991  const size_t kend( ( IsLower<MT4>::value )
1992  ?( ( IsUpper<MT5>::value )
1993  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
1994  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
1995  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1996  :( ( IsUpper<MT5>::value )
1997  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1998  :( K ) ) );
1999  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
2000 
2001  const size_t knum( kend - kbegin );
2002  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
2003 
2004  for( size_t k=kbegin; k<kpos; k+=2UL ) {
2005  (~C)(i,j) += A(i,k ) * B(k ,j);
2006  (~C)(i,j) += A(i,k+1UL) * B(k+1UL,j);
2007  }
2008  if( kpos < kend ) {
2009  (~C)(i,j) += A(i,kpos) * B(kpos,j);
2010  }
2011  }
2012  }
2013  }
2015  //**********************************************************************************************
2016 
2017  //**Default addition assignment to row-major dense matrices (general/diagonal)******************
2031  template< typename MT3 // Type of the left-hand side target matrix
2032  , typename MT4 // Type of the left-hand side matrix operand
2033  , typename MT5 > // Type of the right-hand side matrix operand
2034  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
2035  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2036  {
2037  const size_t M( A.rows() );
2038  const size_t N( B.columns() );
2039 
2040  for( size_t i=0UL; i<M; ++i )
2041  {
2042  const size_t jbegin( ( IsUpper<MT4>::value )
2043  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2044  :( 0UL ) );
2045  const size_t jend( ( IsLower<MT4>::value )
2046  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
2047  :( N ) );
2048  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2049 
2050  const size_t jnum( jend - jbegin );
2051  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
2052 
2053  for( size_t j=jbegin; j<jpos; j+=2UL ) {
2054  (~C)(i,j ) += A(i,j ) * B(j ,j );
2055  (~C)(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL);
2056  }
2057  if( jpos < jend ) {
2058  (~C)(i,jpos) += A(i,jpos) * B(jpos,jpos);
2059  }
2060  }
2061  }
2063  //**********************************************************************************************
2064 
2065  //**Default addition assignment to column-major dense matrices (general/diagonal)***************
2079  template< typename MT3 // Type of the left-hand side target matrix
2080  , typename MT4 // Type of the left-hand side matrix operand
2081  , typename MT5 > // Type of the right-hand side matrix operand
2082  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
2083  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2084  {
2085  constexpr size_t block( BLOCK_SIZE );
2086 
2087  const size_t M( A.rows() );
2088  const size_t N( B.columns() );
2089 
2090  for( size_t jj=0UL; jj<N; jj+=block ) {
2091  const size_t jend( min( N, jj+block ) );
2092  for( size_t ii=0UL; ii<M; ii+=block ) {
2093  const size_t iend( min( M, ii+block ) );
2094  for( size_t j=jj; j<jend; ++j )
2095  {
2096  const size_t ibegin( ( IsLower<MT4>::value )
2097  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
2098  :( ii ) );
2099  const size_t ipos( ( IsUpper<MT4>::value )
2100  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
2101  :( iend ) );
2102 
2103  for( size_t i=ibegin; i<ipos; ++i ) {
2104  (~C)(i,j) += A(i,j) * B(j,j);
2105  }
2106  }
2107  }
2108  }
2109  }
2111  //**********************************************************************************************
2112 
2113  //**Default addition assignment to row-major dense matrices (diagonal/general)******************
2127  template< typename MT3 // Type of the left-hand side target matrix
2128  , typename MT4 // Type of the left-hand side matrix operand
2129  , typename MT5 > // Type of the right-hand side matrix operand
2130  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
2131  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2132  {
2133  constexpr size_t block( BLOCK_SIZE );
2134 
2135  const size_t M( A.rows() );
2136  const size_t N( B.columns() );
2137 
2138  for( size_t ii=0UL; ii<M; ii+=block ) {
2139  const size_t iend( min( M, ii+block ) );
2140  for( size_t jj=0UL; jj<N; jj+=block ) {
2141  const size_t jend( min( N, jj+block ) );
2142  for( size_t i=ii; i<iend; ++i )
2143  {
2144  const size_t jbegin( ( IsUpper<MT5>::value )
2145  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
2146  :( jj ) );
2147  const size_t jpos( ( IsLower<MT5>::value )
2148  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
2149  :( jend ) );
2150 
2151  for( size_t j=jbegin; j<jpos; ++j ) {
2152  (~C)(i,j) += A(i,i) * B(i,j);
2153  }
2154  }
2155  }
2156  }
2157  }
2159  //**********************************************************************************************
2160 
2161  //**Default addition assignment to column-major dense matrices (diagonal/general)***************
2175  template< typename MT3 // Type of the left-hand side target matrix
2176  , typename MT4 // Type of the left-hand side matrix operand
2177  , typename MT5 > // Type of the right-hand side matrix operand
2178  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
2179  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2180  {
2181  const size_t M( A.rows() );
2182  const size_t N( B.columns() );
2183 
2184  for( size_t j=0UL; j<N; ++j )
2185  {
2186  const size_t ibegin( ( IsLower<MT5>::value )
2187  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2188  :( 0UL ) );
2189  const size_t iend( ( IsUpper<MT5>::value )
2190  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2191  :( M ) );
2192  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2193 
2194  const size_t inum( iend - ibegin );
2195  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2196 
2197  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2198  (~C)(i ,j) += A(i ,i ) * B(i ,j);
2199  (~C)(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j);
2200  }
2201  if( ipos < iend ) {
2202  (~C)(ipos,j) += A(ipos,ipos) * B(ipos,j);
2203  }
2204  }
2205  }
2207  //**********************************************************************************************
2208 
2209  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
2223  template< typename MT3 // Type of the left-hand side target matrix
2224  , typename MT4 // Type of the left-hand side matrix operand
2225  , typename MT5 > // Type of the right-hand side matrix operand
2226  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
2227  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2228  {
2229  for( size_t i=0UL; i<A.rows(); ++i ) {
2230  C(i,i) += A(i,i) * B(i,i);
2231  }
2232  }
2234  //**********************************************************************************************
2235 
2236  //**Default addition assignment to dense matrices (small matrices)******************************
2250  template< typename MT3 // Type of the left-hand side target matrix
2251  , typename MT4 // Type of the left-hand side matrix operand
2252  , typename MT5 > // Type of the right-hand side matrix operand
2254  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2255  {
2256  selectDefaultAddAssignKernel( C, A, B );
2257  }
2259  //**********************************************************************************************
2260 
2261  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
2276  template< typename MT3 // Type of the left-hand side target matrix
2277  , typename MT4 // Type of the left-hand side matrix operand
2278  , typename MT5 > // Type of the right-hand side matrix operand
2280  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2281  {
2282  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
2283 
2284  const size_t M( A.rows() );
2285  const size_t N( B.columns() );
2286  const size_t K( A.columns() );
2287 
2288  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
2289 
2290  size_t i( 0UL );
2291 
2292  for( ; (i+2UL) <= M; i+=2UL )
2293  {
2294  const size_t jend( LOW ? i+2UL : N );
2295  size_t j( UPP ? i : 0UL );
2296 
2297  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
2298  {
2299  const size_t kbegin( ( IsUpper<MT4>::value )
2300  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2301  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2302  const size_t kend( ( IsLower<MT4>::value )
2303  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
2304  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
2305 
2306  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2307  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2308 
2309  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2310  size_t k( kbegin );
2311 
2312  for( ; k<kpos; k+=SIMDSIZE ) {
2313  const SIMDType a1( A.load(i ,k) );
2314  const SIMDType a2( A.load(i+1UL,k) );
2315  const SIMDType b1( B.load(k,j ) );
2316  const SIMDType b2( B.load(k,j+1UL) );
2317  const SIMDType b3( B.load(k,j+2UL) );
2318  const SIMDType b4( B.load(k,j+3UL) );
2319  xmm1 += a1 * b1;
2320  xmm2 += a1 * b2;
2321  xmm3 += a1 * b3;
2322  xmm4 += a1 * b4;
2323  xmm5 += a2 * b1;
2324  xmm6 += a2 * b2;
2325  xmm7 += a2 * b3;
2326  xmm8 += a2 * b4;
2327  }
2328 
2329  (~C)(i ,j ) += sum( xmm1 );
2330  (~C)(i ,j+1UL) += sum( xmm2 );
2331  (~C)(i ,j+2UL) += sum( xmm3 );
2332  (~C)(i ,j+3UL) += sum( xmm4 );
2333  (~C)(i+1UL,j ) += sum( xmm5 );
2334  (~C)(i+1UL,j+1UL) += sum( xmm6 );
2335  (~C)(i+1UL,j+2UL) += sum( xmm7 );
2336  (~C)(i+1UL,j+3UL) += sum( xmm8 );
2337 
2338  for( ; remainder && k<kend; ++k ) {
2339  (~C)(i ,j ) += A(i ,k) * B(k,j );
2340  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2341  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL);
2342  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL);
2343  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2344  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2345  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL);
2346  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL);
2347  }
2348  }
2349 
2350  for( ; (j+2UL) <= jend; j+=2UL )
2351  {
2352  const size_t kbegin( ( IsUpper<MT4>::value )
2353  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2354  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2355  const size_t kend( ( IsLower<MT4>::value )
2356  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
2357  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2358 
2359  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2360  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2361 
2362  SIMDType xmm1, xmm2, xmm3, xmm4;
2363  size_t k( kbegin );
2364 
2365  for( ; k<kpos; k+=SIMDSIZE ) {
2366  const SIMDType a1( A.load(i ,k) );
2367  const SIMDType a2( A.load(i+1UL,k) );
2368  const SIMDType b1( B.load(k,j ) );
2369  const SIMDType b2( B.load(k,j+1UL) );
2370  xmm1 += a1 * b1;
2371  xmm2 += a1 * b2;
2372  xmm3 += a2 * b1;
2373  xmm4 += a2 * b2;
2374  }
2375 
2376  (~C)(i ,j ) += sum( xmm1 );
2377  (~C)(i ,j+1UL) += sum( xmm2 );
2378  (~C)(i+1UL,j ) += sum( xmm3 );
2379  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2380 
2381  for( ; remainder && k<kend; ++k ) {
2382  (~C)(i ,j ) += A(i ,k) * B(k,j );
2383  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2384  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2385  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2386  }
2387  }
2388 
2389  if( j < jend )
2390  {
2391  const size_t kbegin( ( IsUpper<MT4>::value )
2392  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2393  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2394  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
2395 
2396  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2397  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2398 
2399  SIMDType xmm1, xmm2;
2400  size_t k( kbegin );
2401 
2402  for( ; k<kpos; k+=SIMDSIZE ) {
2403  const SIMDType b1( B.load(k,j) );
2404  xmm1 += A.load(i ,k) * b1;
2405  xmm2 += A.load(i+1UL,k) * b1;
2406  }
2407 
2408  (~C)(i ,j) += sum( xmm1 );
2409  (~C)(i+1UL,j) += sum( xmm2 );
2410 
2411  for( ; remainder && k<kend; ++k ) {
2412  (~C)(i ,j) += A(i ,k) * B(k,j);
2413  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
2414  }
2415  }
2416  }
2417 
2418  if( i < M )
2419  {
2420  const size_t jend( LOW ? i+1UL : N );
2421  size_t j( UPP ? i : 0UL );
2422 
2423  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
2424  {
2425  const size_t kbegin( ( IsUpper<MT4>::value )
2426  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2427  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2428  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
2429 
2430  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2431  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2432 
2433  SIMDType xmm1, xmm2, xmm3, xmm4;
2434  size_t k( kbegin );
2435 
2436  for( ; k<kpos; k+=SIMDSIZE ) {
2437  const SIMDType a1( A.load(i,k) );
2438  xmm1 += a1 * B.load(k,j );
2439  xmm2 += a1 * B.load(k,j+1UL);
2440  xmm3 += a1 * B.load(k,j+2UL);
2441  xmm4 += a1 * B.load(k,j+3UL);
2442  }
2443 
2444  (~C)(i,j ) += sum( xmm1 );
2445  (~C)(i,j+1UL) += sum( xmm2 );
2446  (~C)(i,j+2UL) += sum( xmm3 );
2447  (~C)(i,j+3UL) += sum( xmm4 );
2448 
2449  for( ; remainder && k<kend; ++k ) {
2450  (~C)(i,j ) += A(i,k) * B(k,j );
2451  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
2452  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL);
2453  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL);
2454  }
2455  }
2456 
2457  for( ; (j+2UL) <= jend; j+=2UL )
2458  {
2459  const size_t kbegin( ( IsUpper<MT4>::value )
2460  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2461  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2462  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
2463 
2464  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2465  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2466 
2467  SIMDType xmm1, xmm2;
2468  size_t k( kbegin );
2469 
2470  for( ; k<kpos; k+=SIMDSIZE ) {
2471  const SIMDType a1( A.load(i,k) );
2472  xmm1 += a1 * B.load(k,j );
2473  xmm2 += a1 * B.load(k,j+1UL);
2474  }
2475 
2476  (~C)(i,j ) += sum( xmm1 );
2477  (~C)(i,j+1UL) += sum( xmm2 );
2478 
2479  for( ; remainder && k<kend; ++k ) {
2480  (~C)(i,j ) += A(i,k) * B(k,j );
2481  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
2482  }
2483  }
2484 
2485  if( j < jend )
2486  {
2487  const size_t kbegin( ( IsUpper<MT4>::value )
2488  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2489  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2490 
2491  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
2492  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2493 
2494  SIMDType xmm1;
2495  size_t k( kbegin );
2496 
2497  for( ; k<kpos; k+=SIMDSIZE ) {
2498  xmm1 += A.load(i,k) * B.load(k,j);
2499  }
2500 
2501  (~C)(i,j) += sum( xmm1 );
2502 
2503  for( ; remainder && k<K; ++k ) {
2504  (~C)(i,j) += A(i,k) * B(k,j);
2505  }
2506  }
2507  }
2508  }
2510  //**********************************************************************************************
2511 
2512  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
2527  template< typename MT3 // Type of the left-hand side target matrix
2528  , typename MT4 // Type of the left-hand side matrix operand
2529  , typename MT5 > // Type of the right-hand side matrix operand
2531  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2532  {
2533  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
2534 
2535  const size_t M( A.rows() );
2536  const size_t N( B.columns() );
2537  const size_t K( A.columns() );
2538 
2539  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
2540 
2541  size_t i( 0UL );
2542 
2543  for( ; !LOW && !UPP && (i+4UL) <= M; i+=4UL )
2544  {
2545  size_t j( 0UL );
2546 
2547  for( ; (j+2UL) <= N; j+=2UL )
2548  {
2549  const size_t kbegin( ( IsUpper<MT4>::value )
2550  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2551  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2552  const size_t kend( ( IsLower<MT4>::value )
2553  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
2554  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2555 
2556  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2557  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2558 
2559  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2560  size_t k( kbegin );
2561 
2562  for( ; k<kpos; k+=SIMDSIZE ) {
2563  const SIMDType a1( A.load(i ,k) );
2564  const SIMDType a2( A.load(i+1UL,k) );
2565  const SIMDType a3( A.load(i+2UL,k) );
2566  const SIMDType a4( A.load(i+3UL,k) );
2567  const SIMDType b1( B.load(k,j ) );
2568  const SIMDType b2( B.load(k,j+1UL) );
2569  xmm1 += a1 * b1;
2570  xmm2 += a1 * b2;
2571  xmm3 += a2 * b1;
2572  xmm4 += a2 * b2;
2573  xmm5 += a3 * b1;
2574  xmm6 += a3 * b2;
2575  xmm7 += a4 * b1;
2576  xmm8 += a4 * b2;
2577  }
2578 
2579  (~C)(i ,j ) += sum( xmm1 );
2580  (~C)(i ,j+1UL) += sum( xmm2 );
2581  (~C)(i+1UL,j ) += sum( xmm3 );
2582  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2583  (~C)(i+2UL,j ) += sum( xmm5 );
2584  (~C)(i+2UL,j+1UL) += sum( xmm6 );
2585  (~C)(i+3UL,j ) += sum( xmm7 );
2586  (~C)(i+3UL,j+1UL) += sum( xmm8 );
2587 
2588  for( ; remainder && k<kend; ++k ) {
2589  (~C)(i ,j ) += A(i ,k) * B(k,j );
2590  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2591  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2592  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2593  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j );
2594  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL);
2595  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j );
2596  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL);
2597  }
2598  }
2599 
2600  if( j < N )
2601  {
2602  const size_t kbegin( ( IsUpper<MT4>::value )
2603  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2604  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2605  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
2606 
2607  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2608  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2609 
2610  SIMDType xmm1, xmm2, xmm3, xmm4;
2611  size_t k( kbegin );
2612 
2613  for( ; k<kpos; k+=SIMDSIZE ) {
2614  const SIMDType b1( B.load(k,j) );
2615  xmm1 += A.load(i ,k) * b1;
2616  xmm2 += A.load(i+1UL,k) * b1;
2617  xmm3 += A.load(i+2UL,k) * b1;
2618  xmm4 += A.load(i+3UL,k) * b1;
2619  }
2620 
2621  (~C)(i ,j) += sum( xmm1 );
2622  (~C)(i+1UL,j) += sum( xmm2 );
2623  (~C)(i+2UL,j) += sum( xmm3 );
2624  (~C)(i+3UL,j) += sum( xmm4 );
2625 
2626  for( ; remainder && k<kend; ++k ) {
2627  (~C)(i ,j) += A(i ,k) * B(k,j);
2628  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
2629  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j);
2630  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j);
2631  }
2632  }
2633  }
2634 
2635  for( ; (i+2UL) <= M; i+=2UL )
2636  {
2637  const size_t jend( LOW ? i+2UL : N );
2638  size_t j( UPP ? i : 0UL );
2639 
2640  for( ; (j+2UL) <= jend; j+=2UL )
2641  {
2642  const size_t kbegin( ( IsUpper<MT4>::value )
2643  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2644  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2645  const size_t kend( ( IsLower<MT4>::value )
2646  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
2647  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2648 
2649  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2650  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2651 
2652  SIMDType xmm1, xmm2, xmm3, xmm4;
2653  size_t k( kbegin );
2654 
2655  for( ; k<kpos; k+=SIMDSIZE ) {
2656  const SIMDType a1( A.load(i ,k) );
2657  const SIMDType a2( A.load(i+1UL,k) );
2658  const SIMDType b1( B.load(k,j ) );
2659  const SIMDType b2( B.load(k,j+1UL) );
2660  xmm1 += a1 * b1;
2661  xmm2 += a1 * b2;
2662  xmm3 += a2 * b1;
2663  xmm4 += a2 * b2;
2664  }
2665 
2666  (~C)(i ,j ) += sum( xmm1 );
2667  (~C)(i ,j+1UL) += sum( xmm2 );
2668  (~C)(i+1UL,j ) += sum( xmm3 );
2669  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2670 
2671  for( ; remainder && k<kend; ++k ) {
2672  (~C)(i ,j ) += A(i ,k) * B(k,j );
2673  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2674  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2675  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2676  }
2677  }
2678 
2679  if( j < jend )
2680  {
2681  const size_t kbegin( ( IsUpper<MT4>::value )
2682  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2683  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2684  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
2685 
2686  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2687  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2688 
2689  SIMDType xmm1, xmm2;
2690  size_t k( kbegin );
2691 
2692  for( ; k<kpos; k+=SIMDSIZE ) {
2693  const SIMDType b1( B.load(k,j) );
2694  xmm1 += A.load(i ,k) * b1;
2695  xmm2 += A.load(i+1UL,k) * b1;
2696  }
2697 
2698  (~C)(i ,j) += sum( xmm1 );
2699  (~C)(i+1UL,j) += sum( xmm2 );
2700 
2701  for( ; remainder && k<kend; ++k ) {
2702  (~C)(i ,j) += A(i ,k) * B(k,j);
2703  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
2704  }
2705  }
2706  }
2707 
2708  if( i < M )
2709  {
2710  const size_t jend( LOW ? i+1UL : N );
2711  size_t j( UPP ? i : 0UL );
2712 
2713  for( ; (j+2UL) <= jend; j+=2UL )
2714  {
2715  const size_t kbegin( ( IsUpper<MT4>::value )
2716  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2717  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2718  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
2719 
2720  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2721  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2722 
2723  SIMDType xmm1, xmm2;
2724  size_t k( kbegin );
2725 
2726  for( ; k<kpos; k+=SIMDSIZE ) {
2727  const SIMDType a1( A.load(i,k) );
2728  xmm1 += a1 * B.load(k,j );
2729  xmm2 += a1 * B.load(k,j+1UL);
2730  }
2731 
2732  (~C)(i,j ) += sum( xmm1 );
2733  (~C)(i,j+1UL) += sum( xmm2 );
2734 
2735  for( ; remainder && k<kend; ++k ) {
2736  (~C)(i,j ) += A(i,k) * B(k,j );
2737  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
2738  }
2739  }
2740 
2741  if( j < jend )
2742  {
2743  const size_t kbegin( ( IsUpper<MT4>::value )
2744  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2745  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2746 
2747  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
2748  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2749 
2750  SIMDType xmm1;
2751  size_t k( kbegin );
2752 
2753  for( ; k<kpos; k+=SIMDSIZE ) {
2754  xmm1 += A.load(i,k) * B.load(k,j);
2755  }
2756 
2757  (~C)(i,j) += sum( xmm1 );
2758 
2759  for( ; remainder && k<K; ++k ) {
2760  (~C)(i,j) += A(i,k) * B(k,j);
2761  }
2762  }
2763  }
2764  }
2766  //**********************************************************************************************
2767 
2768  //**Default addition assignment to dense matrices (large matrices)******************************
2782  template< typename MT3 // Type of the left-hand side target matrix
2783  , typename MT4 // Type of the left-hand side matrix operand
2784  , typename MT5 > // Type of the right-hand side matrix operand
2786  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2787  {
2788  selectDefaultAddAssignKernel( C, A, B );
2789  }
2791  //**********************************************************************************************
2792 
2793  //**Vectorized default addition assignment to dense matrices (large matrices)*******************
2808  template< typename MT3 // Type of the left-hand side target matrix
2809  , typename MT4 // Type of the left-hand side matrix operand
2810  , typename MT5 > // Type of the right-hand side matrix operand
2812  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2813  {
2814  if( LOW )
2815  lmmm( C, A, B, ElementType(1), ElementType(1) );
2816  else if( UPP )
2817  ummm( C, A, B, ElementType(1), ElementType(1) );
2818  else
2819  mmm( C, A, B, ElementType(1), ElementType(1) );
2820  }
2822  //**********************************************************************************************
2823 
2824  //**BLAS-based addition assignment to dense matrices (default)**********************************
2838  template< typename MT3 // Type of the left-hand side target matrix
2839  , typename MT4 // Type of the left-hand side matrix operand
2840  , typename MT5 > // Type of the right-hand side matrix operand
2842  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2843  {
2844  selectLargeAddAssignKernel( C, A, B );
2845  }
2847  //**********************************************************************************************
2848 
2849  //**BLAS-based addition assignment to dense matrices********************************************
2850 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
2851 
2864  template< typename MT3 // Type of the left-hand side target matrix
2865  , typename MT4 // Type of the left-hand side matrix operand
2866  , typename MT5 > // Type of the right-hand side matrix operand
2868  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2869  {
2870  using ET = ElementType_<MT3>;
2871 
2872  if( IsTriangular<MT4>::value ) {
2873  ResultType_<MT3> tmp( serial( B ) );
2874  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2875  addAssign( C, tmp );
2876  }
2877  else if( IsTriangular<MT5>::value ) {
2878  ResultType_<MT3> tmp( serial( A ) );
2879  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2880  addAssign( C, tmp );
2881  }
2882  else {
2883  gemm( C, A, B, ET(1), ET(1) );
2884  }
2885  }
2887 #endif
2888  //**********************************************************************************************
2889 
2890  //**Addition assignment to sparse matrices******************************************************
2891  // No special implementation for the addition assignment to sparse matrices.
2892  //**********************************************************************************************
2893 
2894  //**Subtraction assignment to dense matrices****************************************************
2907  template< typename MT // Type of the target dense matrix
2908  , bool SO > // Storage order of the target dense matrix
2909  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2910  {
2912 
2913  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2914  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2915 
2916  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2917  return;
2918  }
2919 
2920  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
2921  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
2922 
2923  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2924  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2925  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2926  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2927  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2928  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2929 
2930  DMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
2931  }
2933  //**********************************************************************************************
2934 
2935  //**Subtraction assignment to dense matrices (kernel selection)*********************************
2946  template< typename MT3 // Type of the left-hand side target matrix
2947  , typename MT4 // Type of the left-hand side matrix operand
2948  , typename MT5 > // Type of the right-hand side matrix operand
2949  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2950  {
2952  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
2953  selectSmallSubAssignKernel( C, A, B );
2954  else
2955  selectBlasSubAssignKernel( C, A, B );
2956  }
2958  //**********************************************************************************************
2959 
2960  //**Default subtraction assignment to row-major dense matrices (general/general)****************
2974  template< typename MT3 // Type of the left-hand side target matrix
2975  , typename MT4 // Type of the left-hand side matrix operand
2976  , typename MT5 > // Type of the right-hand side matrix operand
2977  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
2978  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2979  {
2980  const size_t M( A.rows() );
2981  const size_t N( B.columns() );
2982  const size_t K( A.columns() );
2983 
2984  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
2985 
2986  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
2987  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
2988  :( 0UL ) );
2989  const size_t iend( ( IsStrictlyUpper<MT4>::value )
2990  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
2991  :( M ) );
2992  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2993 
2994  for( size_t i=ibegin; i<iend; ++i )
2995  {
2996  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
2998  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
2999  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
3001  ?( UPP ? max( i, 1UL ) : 1UL )
3002  :( UPP ? i : 0UL ) ) );
3003  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
3005  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
3006  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
3008  ?( LOW ? min(i+1UL,N-1UL) : N-1UL )
3009  :( LOW ? i+1UL : N ) ) );
3010 
3011  if( ( LOW || UPP ) && ( jbegin > jend ) ) continue;
3012  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3013 
3014  for( size_t j=jbegin; j<jend; ++j )
3015  {
3016  const size_t kbegin( ( IsUpper<MT4>::value )
3017  ?( ( IsLower<MT5>::value )
3018  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
3019  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3020  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3021  :( ( IsLower<MT5>::value )
3022  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3023  :( 0UL ) ) );
3024  const size_t kend( ( IsLower<MT4>::value )
3025  ?( ( IsUpper<MT5>::value )
3026  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
3027  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
3028  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
3029  :( ( IsUpper<MT5>::value )
3030  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3031  :( K ) ) );
3032  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
3033 
3034  const size_t knum( kend - kbegin );
3035  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
3036 
3037  for( size_t k=kbegin; k<kpos; k+=2UL ) {
3038  (~C)(i,j) -= A(i,k ) * B(k ,j);
3039  (~C)(i,j) -= A(i,k+1UL) * B(k+1UL,j);
3040  }
3041  if( kpos < kend ) {
3042  (~C)(i,j) -= A(i,kpos) * B(kpos,j);
3043  }
3044  }
3045  }
3046  }
3048  //**********************************************************************************************
3049 
3050  //**Default subtraction assignment to column-major dense matrices (general/general)*************
3064  template< typename MT3 // Type of the left-hand side target matrix
3065  , typename MT4 // Type of the left-hand side matrix operand
3066  , typename MT5 > // Type of the right-hand side matrix operand
3067  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
3068  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3069  {
3070  const size_t M( A.rows() );
3071  const size_t N( B.columns() );
3072  const size_t K( A.columns() );
3073 
3074  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
3075 
3076  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
3077  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
3078  :( 0UL ) );
3079  const size_t jend( ( IsStrictlyLower<MT5>::value )
3080  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
3081  :( N ) );
3082  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3083 
3084  for( size_t j=jbegin; j<jend; ++j )
3085  {
3086  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
3088  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
3089  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3091  ?( LOW ? max( j, 1UL ) : 1UL )
3092  :( LOW ? j : 0UL ) ) );
3093  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
3095  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
3096  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
3098  ?( UPP ? min(j+1UL,M-1UL) : M-1UL )
3099  :( UPP ? j+1UL : M ) ) );
3100 
3101  if( ( LOW || UPP ) && ( ibegin > iend ) ) continue;
3102  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3103 
3104  for( size_t i=ibegin; i<iend; ++i )
3105  {
3106  const size_t kbegin( ( IsUpper<MT4>::value )
3107  ?( ( IsLower<MT5>::value )
3108  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
3109  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3110  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3111  :( ( IsLower<MT5>::value )
3112  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3113  :( 0UL ) ) );
3114  const size_t kend( ( IsLower<MT4>::value )
3115  ?( ( IsUpper<MT5>::value )
3116  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
3117  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
3118  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
3119  :( ( IsUpper<MT5>::value )
3120  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3121  :( K ) ) );
3122  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
3123 
3124  const size_t knum( kend - kbegin );
3125  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
3126 
3127  for( size_t k=kbegin; k<kpos; k+=2UL ) {
3128  (~C)(i,j) -= A(i,k ) * B(k ,j);
3129  (~C)(i,j) -= A(i,k+1UL) * B(k+1UL,j);
3130  }
3131  if( kpos < kend ) {
3132  (~C)(i,j) -= A(i,kpos) * B(kpos,j);
3133  }
3134  }
3135  }
3136  }
3138  //**********************************************************************************************
3139 
3140  //**Default subtraction assignment to row-major dense matrices (general/diagonal)***************
3154  template< typename MT3 // Type of the left-hand side target matrix
3155  , typename MT4 // Type of the left-hand side matrix operand
3156  , typename MT5 > // Type of the right-hand side matrix operand
3157  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
3158  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3159  {
3160  const size_t M( A.rows() );
3161  const size_t N( B.columns() );
3162 
3163  for( size_t i=0UL; i<M; ++i )
3164  {
3165  const size_t jbegin( ( IsUpper<MT4>::value )
3166  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
3167  :( 0UL ) );
3168  const size_t jend( ( IsLower<MT4>::value )
3169  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
3170  :( N ) );
3171  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3172 
3173  const size_t jnum( jend - jbegin );
3174  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
3175 
3176  for( size_t j=jbegin; j<jpos; j+=2UL ) {
3177  (~C)(i,j ) -= A(i,j ) * B(j ,j );
3178  (~C)(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL);
3179  }
3180  if( jpos < jend ) {
3181  (~C)(i,jpos) -= A(i,jpos) * B(jpos,jpos);
3182  }
3183  }
3184  }
3186  //**********************************************************************************************
3187 
3188  //**Default subtraction assignment to column-major dense matrices (general/diagonal)************
3202  template< typename MT3 // Type of the left-hand side target matrix
3203  , typename MT4 // Type of the left-hand side matrix operand
3204  , typename MT5 > // Type of the right-hand side matrix operand
3205  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
3206  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3207  {
3208  constexpr size_t block( BLOCK_SIZE );
3209 
3210  const size_t M( A.rows() );
3211  const size_t N( B.columns() );
3212 
3213  for( size_t jj=0UL; jj<N; jj+=block ) {
3214  const size_t jend( min( N, jj+block ) );
3215  for( size_t ii=0UL; ii<M; ii+=block ) {
3216  const size_t iend( min( M, ii+block ) );
3217  for( size_t j=jj; j<jend; ++j )
3218  {
3219  const size_t ibegin( ( IsLower<MT4>::value )
3220  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
3221  :( ii ) );
3222  const size_t ipos( ( IsUpper<MT4>::value )
3223  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
3224  :( iend ) );
3225 
3226  for( size_t i=ibegin; i<ipos; ++i ) {
3227  (~C)(i,j) -= A(i,j) * B(j,j);
3228  }
3229  }
3230  }
3231  }
3232  }
3234  //**********************************************************************************************
3235 
3236  //**Default subtraction assignment to row-major dense matrices (diagonal/general)***************
3250  template< typename MT3 // Type of the left-hand side target matrix
3251  , typename MT4 // Type of the left-hand side matrix operand
3252  , typename MT5 > // Type of the right-hand side matrix operand
3253  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
3254  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3255  {
3256  constexpr size_t block( BLOCK_SIZE );
3257 
3258  const size_t M( A.rows() );
3259  const size_t N( B.columns() );
3260 
3261  for( size_t ii=0UL; ii<M; ii+=block ) {
3262  const size_t iend( min( M, ii+block ) );
3263  for( size_t jj=0UL; jj<N; jj+=block ) {
3264  const size_t jend( min( N, jj+block ) );
3265  for( size_t i=ii; i<iend; ++i )
3266  {
3267  const size_t jbegin( ( IsUpper<MT5>::value )
3268  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
3269  :( jj ) );
3270  const size_t jpos( ( IsLower<MT5>::value )
3271  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
3272  :( jend ) );
3273 
3274  for( size_t j=jbegin; j<jpos; ++j ) {
3275  (~C)(i,j) -= A(i,i) * B(i,j);
3276  }
3277  }
3278  }
3279  }
3280  }
3282  //**********************************************************************************************
3283 
3284  //**Default subtraction assignment to column-major dense matrices (diagonal/general)************
3298  template< typename MT3 // Type of the left-hand side target matrix
3299  , typename MT4 // Type of the left-hand side matrix operand
3300  , typename MT5 > // Type of the right-hand side matrix operand
3301  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
3302  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3303  {
3304  const size_t M( A.rows() );
3305  const size_t N( B.columns() );
3306 
3307  for( size_t j=0UL; j<N; ++j )
3308  {
3309  const size_t ibegin( ( IsLower<MT5>::value )
3310  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3311  :( 0UL ) );
3312  const size_t iend( ( IsUpper<MT5>::value )
3313  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3314  :( M ) );
3315  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3316 
3317  const size_t inum( iend - ibegin );
3318  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
3319 
3320  for( size_t i=ibegin; i<ipos; i+=2UL ) {
3321  (~C)(i ,j) -= A(i ,i ) * B(i ,j);
3322  (~C)(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j);
3323  }
3324  if( ipos < iend ) {
3325  (~C)(ipos,j) -= A(ipos,ipos) * B(ipos,j);
3326  }
3327  }
3328  }
3330  //**********************************************************************************************
3331 
3332  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
3346  template< typename MT3 // Type of the left-hand side target matrix
3347  , typename MT4 // Type of the left-hand side matrix operand
3348  , typename MT5 > // Type of the right-hand side matrix operand
3349  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
3350  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3351  {
3352  for( size_t i=0UL; i<A.rows(); ++i ) {
3353  C(i,i) -= A(i,i) * B(i,i);
3354  }
3355  }
3357  //**********************************************************************************************
3358 
3359  //**Default subtraction assignment to dense matrices (small matrices)***************************
3373  template< typename MT3 // Type of the left-hand side target matrix
3374  , typename MT4 // Type of the left-hand side matrix operand
3375  , typename MT5 > // Type of the right-hand side matrix operand
3377  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3378  {
3379  selectDefaultSubAssignKernel( ~C, A, B );
3380  }
3382  //**********************************************************************************************
3383 
3384  //**Default subtraction assignment to row-major dense matrices (small matrices)*****************
3399  template< typename MT3 // Type of the left-hand side target matrix
3400  , typename MT4 // Type of the left-hand side matrix operand
3401  , typename MT5 > // Type of the right-hand side matrix operand
3403  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3404  {
3405  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
3406 
3407  const size_t M( A.rows() );
3408  const size_t N( B.columns() );
3409  const size_t K( A.columns() );
3410 
3411  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
3412 
3413  size_t i( 0UL );
3414 
3415  for( ; (i+2UL) <= M; i+=2UL )
3416  {
3417  const size_t jend( LOW ? i+2UL : N );
3418  size_t j( UPP ? i : 0UL );
3419 
3420  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
3421  {
3422  const size_t kbegin( ( IsUpper<MT4>::value )
3423  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3424  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3425  const size_t kend( ( IsLower<MT4>::value )
3426  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
3427  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
3428 
3429  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3430  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3431 
3432  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3433  size_t k( kbegin );
3434 
3435  for( ; k<kpos; k+=SIMDSIZE ) {
3436  const SIMDType a1( A.load(i ,k) );
3437  const SIMDType a2( A.load(i+1UL,k) );
3438  const SIMDType b1( B.load(k,j ) );
3439  const SIMDType b2( B.load(k,j+1UL) );
3440  const SIMDType b3( B.load(k,j+2UL) );
3441  const SIMDType b4( B.load(k,j+3UL) );
3442  xmm1 += a1 * b1;
3443  xmm2 += a1 * b2;
3444  xmm3 += a1 * b3;
3445  xmm4 += a1 * b4;
3446  xmm5 += a2 * b1;
3447  xmm6 += a2 * b2;
3448  xmm7 += a2 * b3;
3449  xmm8 += a2 * b4;
3450  }
3451 
3452  (~C)(i ,j ) -= sum( xmm1 );
3453  (~C)(i ,j+1UL) -= sum( xmm2 );
3454  (~C)(i ,j+2UL) -= sum( xmm3 );
3455  (~C)(i ,j+3UL) -= sum( xmm4 );
3456  (~C)(i+1UL,j ) -= sum( xmm5 );
3457  (~C)(i+1UL,j+1UL) -= sum( xmm6 );
3458  (~C)(i+1UL,j+2UL) -= sum( xmm7 );
3459  (~C)(i+1UL,j+3UL) -= sum( xmm8 );
3460 
3461  for( ; remainder && k<kend; ++k ) {
3462  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3463  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3464  (~C)(i ,j+2UL) -= A(i ,k) * B(k,j+2UL);
3465  (~C)(i ,j+3UL) -= A(i ,k) * B(k,j+3UL);
3466  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3467  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3468  (~C)(i+1UL,j+2UL) -= A(i+1UL,k) * B(k,j+2UL);
3469  (~C)(i+1UL,j+3UL) -= A(i+1UL,k) * B(k,j+3UL);
3470  }
3471  }
3472 
3473  for( ; (j+2UL) <= jend; j+=2UL )
3474  {
3475  const size_t kbegin( ( IsUpper<MT4>::value )
3476  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3477  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3478  const size_t kend( ( IsLower<MT4>::value )
3479  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
3480  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3481 
3482  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3483  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3484 
3485  SIMDType xmm1, xmm2, xmm3, xmm4;
3486  size_t k( kbegin );
3487 
3488  for( ; k<kpos; k+=SIMDSIZE ) {
3489  const SIMDType a1( A.load(i ,k) );
3490  const SIMDType a2( A.load(i+1UL,k) );
3491  const SIMDType b1( B.load(k,j ) );
3492  const SIMDType b2( B.load(k,j+1UL) );
3493  xmm1 += a1 * b1;
3494  xmm2 += a1 * b2;
3495  xmm3 += a2 * b1;
3496  xmm4 += a2 * b2;
3497  }
3498 
3499  (~C)(i ,j ) -= sum( xmm1 );
3500  (~C)(i ,j+1UL) -= sum( xmm2 );
3501  (~C)(i+1UL,j ) -= sum( xmm3 );
3502  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3503 
3504  for( ; remainder && k<kend; ++k ) {
3505  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3506  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3507  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3508  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3509  }
3510  }
3511 
3512  if( j < jend )
3513  {
3514  const size_t kbegin( ( IsUpper<MT4>::value )
3515  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3516  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3517  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
3518 
3519  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3520  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3521 
3522  SIMDType xmm1, xmm2;
3523  size_t k( kbegin );
3524 
3525  for( ; k<kpos; k+=SIMDSIZE ) {
3526  const SIMDType b1( B.load(k,j) );
3527  xmm1 += A.load(i ,k) * b1;
3528  xmm2 += A.load(i+1UL,k) * b1;
3529  }
3530 
3531  (~C)(i ,j) -= sum( xmm1 );
3532  (~C)(i+1UL,j) -= sum( xmm2 );
3533 
3534  for( ; remainder && k<kend; ++k ) {
3535  (~C)(i ,j) -= A(i ,k) * B(k,j);
3536  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j);
3537  }
3538  }
3539  }
3540 
3541  if( i < M )
3542  {
3543  const size_t jend( LOW ? i+1UL : N );
3544  size_t j( UPP ? i : 0UL );
3545 
3546  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
3547  {
3548  const size_t kbegin( ( IsUpper<MT4>::value )
3549  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3550  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3551  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
3552 
3553  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3554  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3555 
3556  SIMDType xmm1, xmm2, xmm3, xmm4;
3557  size_t k( kbegin );
3558 
3559  for( ; k<kpos; k+=SIMDSIZE ) {
3560  const SIMDType a1( A.load(i,k) );
3561  xmm1 += a1 * B.load(k,j );
3562  xmm2 += a1 * B.load(k,j+1UL);
3563  xmm3 += a1 * B.load(k,j+2UL);
3564  xmm4 += a1 * B.load(k,j+3UL);
3565  }
3566 
3567  (~C)(i,j ) -= sum( xmm1 );
3568  (~C)(i,j+1UL) -= sum( xmm2 );
3569  (~C)(i,j+2UL) -= sum( xmm3 );
3570  (~C)(i,j+3UL) -= sum( xmm4 );
3571 
3572  for( ; remainder && k<kend; ++k ) {
3573  (~C)(i,j ) -= A(i,k) * B(k,j );
3574  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL);
3575  (~C)(i,j+2UL) -= A(i,k) * B(k,j+2UL);
3576  (~C)(i,j+3UL) -= A(i,k) * B(k,j+3UL);
3577  }
3578  }
3579 
3580  for( ; (j+2UL) <= jend; j+=2UL )
3581  {
3582  const size_t kbegin( ( IsUpper<MT4>::value )
3583  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3584  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3585  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
3586 
3587  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3588  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3589 
3590  SIMDType xmm1, xmm2;
3591  size_t k( kbegin );
3592 
3593  for( ; k<kpos; k+=SIMDSIZE ) {
3594  const SIMDType a1( A.load(i,k) );
3595  xmm1 += a1 * B.load(k,j );
3596  xmm2 += a1 * B.load(k,j+1UL);
3597  }
3598 
3599  (~C)(i,j ) -= sum( xmm1 );
3600  (~C)(i,j+1UL) -= sum( xmm2 );
3601 
3602  for( ; remainder && k<kend; ++k ) {
3603  (~C)(i,j ) -= A(i,k) * B(k,j );
3604  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL);
3605  }
3606  }
3607 
3608  if( j < jend )
3609  {
3610  const size_t kbegin( ( IsUpper<MT4>::value )
3611  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3612  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3613 
3614  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
3615  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3616 
3617  SIMDType xmm1;
3618  size_t k( kbegin );
3619 
3620  for( ; k<kpos; k+=SIMDSIZE ) {
3621  xmm1 += A.load(i,k) * B.load(k,j);
3622  }
3623 
3624  (~C)(i,j) -= sum( xmm1 );
3625 
3626  for( ; remainder && k<K; ++k ) {
3627  (~C)(i,j) -= A(i,k) * B(k,j);
3628  }
3629  }
3630  }
3631  }
3633  //**********************************************************************************************
3634 
3635  //**Default subtraction assignment to column-major dense matrices (small matrices)**************
3650  template< typename MT3 // Type of the left-hand side target matrix
3651  , typename MT4 // Type of the left-hand side matrix operand
3652  , typename MT5 > // Type of the right-hand side matrix operand
3654  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3655  {
3656  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
3657 
3658  const size_t M( A.rows() );
3659  const size_t N( B.columns() );
3660  const size_t K( A.columns() );
3661 
3662  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
3663 
3664  size_t i( 0UL );
3665 
3666  for( ; !LOW && !UPP && (i+4UL) <= M; i+=4UL )
3667  {
3668  size_t j( 0UL );
3669 
3670  for( ; (j+2UL) <= N; j+=2UL )
3671  {
3672  const size_t kbegin( ( IsUpper<MT4>::value )
3673  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3674  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3675  const size_t kend( ( IsLower<MT4>::value )
3676  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
3677  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3678 
3679  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3680  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3681 
3682  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3683  size_t k( kbegin );
3684 
3685  for( ; k<kpos; k+=SIMDSIZE ) {
3686  const SIMDType a1( A.load(i ,k) );
3687  const SIMDType a2( A.load(i+1UL,k) );
3688  const SIMDType a3( A.load(i+2UL,k) );
3689  const SIMDType a4( A.load(i+3UL,k) );
3690  const SIMDType b1( B.load(k,j ) );
3691  const SIMDType b2( B.load(k,j+1UL) );
3692  xmm1 += a1 * b1;
3693  xmm2 += a1 * b2;
3694  xmm3 += a2 * b1;
3695  xmm4 += a2 * b2;
3696  xmm5 += a3 * b1;
3697  xmm6 += a3 * b2;
3698  xmm7 += a4 * b1;
3699  xmm8 += a4 * b2;
3700  }
3701 
3702  (~C)(i ,j ) -= sum( xmm1 );
3703  (~C)(i ,j+1UL) -= sum( xmm2 );
3704  (~C)(i+1UL,j ) -= sum( xmm3 );
3705  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3706  (~C)(i+2UL,j ) -= sum( xmm5 );
3707  (~C)(i+2UL,j+1UL) -= sum( xmm6 );
3708  (~C)(i+3UL,j ) -= sum( xmm7 );
3709  (~C)(i+3UL,j+1UL) -= sum( xmm8 );
3710 
3711  for( ; remainder && k<kend; ++k ) {
3712  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3713  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3714  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3715  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3716  (~C)(i+2UL,j ) -= A(i+2UL,k) * B(k,j );
3717  (~C)(i+2UL,j+1UL) -= A(i+2UL,k) * B(k,j+1UL);
3718  (~C)(i+3UL,j ) -= A(i+3UL,k) * B(k,j );
3719  (~C)(i+3UL,j+1UL) -= A(i+3UL,k) * B(k,j+1UL);
3720  }
3721  }
3722 
3723  if( j < N )
3724  {
3725  const size_t kbegin( ( IsUpper<MT4>::value )
3726  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3727  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3728  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
3729 
3730  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3731  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3732 
3733  SIMDType xmm1, xmm2, xmm3, xmm4;
3734  size_t k( kbegin );
3735 
3736  for( ; k<kpos; k+=SIMDSIZE ) {
3737  const SIMDType b1( B.load(k,j) );
3738  xmm1 += A.load(i ,k) * b1;
3739  xmm2 += A.load(i+1UL,k) * b1;
3740  xmm3 += A.load(i+2UL,k) * b1;
3741  xmm4 += A.load(i+3UL,k) * b1;
3742  }
3743 
3744  (~C)(i ,j) -= sum( xmm1 );
3745  (~C)(i+1UL,j) -= sum( xmm2 );
3746  (~C)(i+2UL,j) -= sum( xmm3 );
3747  (~C)(i+3UL,j) -= sum( xmm4 );
3748 
3749  for( ; remainder && k<kend; ++k ) {
3750  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3751  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3752  (~C)(i+2UL,j ) -= A(i+2UL,k) * B(k,j );
3753  (~C)(i+3UL,j ) -= A(i+3UL,k) * B(k,j );
3754  }
3755  }
3756  }
3757 
3758  for( ; (i+2UL) <= M; i+=2UL )
3759  {
3760  const size_t jend( LOW ? i+2UL : N );
3761  size_t j( UPP ? i : 0UL );
3762 
3763  for( ; (j+2UL) <= jend; j+=2UL )
3764  {
3765  const size_t kbegin( ( IsUpper<MT4>::value )
3766  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3767  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3768  const size_t kend( ( IsLower<MT4>::value )
3769  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
3770  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3771 
3772  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3773  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3774 
3775  SIMDType xmm1, xmm2, xmm3, xmm4;
3776  size_t k( kbegin );
3777 
3778  for( ; k<kpos; k+=SIMDSIZE ) {
3779  const SIMDType a1( A.load(i ,k) );
3780  const SIMDType a2( A.load(i+1UL,k) );
3781  const SIMDType b1( B.load(k,j ) );
3782  const SIMDType b2( B.load(k,j+1UL) );
3783  xmm1 += a1 * b1;
3784  xmm2 += a1 * b2;
3785  xmm3 += a2 * b1;
3786  xmm4 += a2 * b2;
3787  }
3788 
3789  (~C)(i ,j ) -= sum( xmm1 );
3790  (~C)(i ,j+1UL) -= sum( xmm2 );
3791  (~C)(i+1UL,j ) -= sum( xmm3 );
3792  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3793 
3794  for( ; remainder && k<kend; ++k ) {
3795  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3796  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3797  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3798  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3799  }
3800  }
3801 
3802  if( j < jend )
3803  {
3804  const size_t kbegin( ( IsUpper<MT4>::value )
3805  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3806  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3807  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
3808 
3809  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3810  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3811 
3812  SIMDType xmm1, xmm2;
3813  size_t k( kbegin );
3814 
3815  for( ; k<kpos; k+=SIMDSIZE ) {
3816  const SIMDType b1( B.load(k,j) );
3817  xmm1 += A.load(i ,k) * b1;
3818  xmm2 += A.load(i+1UL,k) * b1;
3819  }
3820 
3821  (~C)(i ,j) -= sum( xmm1 );
3822  (~C)(i+1UL,j) -= sum( xmm2 );
3823 
3824  for( ; remainder && k<kend; ++k ) {
3825  (~C)(i ,j) -= A(i ,k) * B(k,j);
3826  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j);
3827  }
3828  }
3829  }
3830 
3831  if( i < M )
3832  {
3833  const size_t jend( LOW ? i+1UL : N );
3834  size_t j( UPP ? i : 0UL );
3835 
3836  for( ; (j+2UL) <= jend; j+=2UL )
3837  {
3838  const size_t kbegin( ( IsUpper<MT4>::value )
3839  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3840  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3841  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
3842 
3843  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3844  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3845 
3846  SIMDType xmm1, xmm2;
3847  size_t k( kbegin );
3848 
3849  for( ; k<kpos; k+=SIMDSIZE ) {
3850  const SIMDType a1( A.load(i,k) );
3851  xmm1 += a1 * B.load(k,j );
3852  xmm2 += a1 * B.load(k,j+1UL);
3853  }
3854 
3855  (~C)(i,j ) -= sum( xmm1 );
3856  (~C)(i,j+1UL) -= sum( xmm2 );
3857 
3858  for( ; remainder && k<kend; ++k ) {
3859  (~C)(i,j ) -= A(i,k) * B(k,j );
3860  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL);
3861  }
3862  }
3863 
3864  if( j < jend )
3865  {
3866  const size_t kbegin( ( IsUpper<MT4>::value )
3867  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3868  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3869 
3870  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
3871  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3872 
3873  SIMDType xmm1;
3874  size_t k( kbegin );
3875 
3876  for( ; k<kpos; k+=SIMDSIZE ) {
3877  xmm1 += A.load(i,k) * B.load(k,j);
3878  }
3879 
3880  (~C)(i,j) -= sum( xmm1 );
3881 
3882  for( ; remainder && k<K; ++k ) {
3883  (~C)(i,j) -= A(i,k) * B(k,j);
3884  }
3885  }
3886  }
3887  }
3889  //**********************************************************************************************
3890 
3891  //**Default subtraction assignment to dense matrices (large matrices)***************************
3905  template< typename MT3 // Type of the left-hand side target matrix
3906  , typename MT4 // Type of the left-hand side matrix operand
3907  , typename MT5 > // Type of the right-hand side matrix operand
3909  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3910  {
3911  selectDefaultSubAssignKernel( ~C, A, B );
3912  }
3914  //**********************************************************************************************
3915 
3916  //**Default subtraction assignment to dense matrices (large matrices)***************************
3931  template< typename MT3 // Type of the left-hand side target matrix
3932  , typename MT4 // Type of the left-hand side matrix operand
3933  , typename MT5 > // Type of the right-hand side matrix operand
3935  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3936  {
3937  if( LOW )
3938  lmmm( C, A, B, ElementType(-1), ElementType(1) );
3939  else if( UPP )
3940  ummm( C, A, B, ElementType(-1), ElementType(1) );
3941  else
3942  mmm( C, A, B, ElementType(-1), ElementType(1) );
3943  }
3945  //**********************************************************************************************
3946 
3947  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3961  template< typename MT3 // Type of the left-hand side target matrix
3962  , typename MT4 // Type of the left-hand side matrix operand
3963  , typename MT5 > // Type of the right-hand side matrix operand
3965  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3966  {
3967  selectLargeSubAssignKernel( C, A, B );
3968  }
3970  //**********************************************************************************************
3971 
3972  //**BLAS-based subraction assignment to dense matrices******************************************
3973 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
3974 
3987  template< typename MT3 // Type of the left-hand side target matrix
3988  , typename MT4 // Type of the left-hand side matrix operand
3989  , typename MT5 > // Type of the right-hand side matrix operand
3991  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3992  {
3993  using ET = ElementType_<MT3>;
3994 
3995  if( IsTriangular<MT4>::value ) {
3996  ResultType_<MT3> tmp( serial( B ) );
3997  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
3998  subAssign( C, tmp );
3999  }
4000  else if( IsTriangular<MT5>::value ) {
4001  ResultType_<MT3> tmp( serial( A ) );
4002  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
4003  subAssign( C, tmp );
4004  }
4005  else {
4006  gemm( C, A, B, ET(-1), ET(1) );
4007  }
4008  }
4010 #endif
4011  //**********************************************************************************************
4012 
4013  //**Subtraction assignment to sparse matrices***************************************************
4014  // No special implementation for the subtraction assignment to sparse matrices.
4015  //**********************************************************************************************
4016 
4017  //**Schur product assignment to dense matrices**************************************************
4030  template< typename MT // Type of the target dense matrix
4031  , bool SO > // Storage order of the target dense matrix
4032  friend inline void schurAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4033  {
4035 
4039 
4040  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4041  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4042 
4043  const ResultType tmp( serial( rhs ) );
4044  schurAssign( ~lhs, tmp );
4045  }
4047  //**********************************************************************************************
4048 
4049  //**Schur product assignment to sparse matrices*************************************************
4050  // No special implementation for the Schur product assignment to sparse matrices.
4051  //**********************************************************************************************
4052 
4053  //**Multiplication assignment to dense matrices*************************************************
4054  // No special implementation for the multiplication assignment to dense matrices.
4055  //**********************************************************************************************
4056 
4057  //**Multiplication assignment to sparse matrices************************************************
4058  // No special implementation for the multiplication assignment to sparse matrices.
4059  //**********************************************************************************************
4060 
4061  //**SMP assignment to dense matrices************************************************************
4076  template< typename MT // Type of the target dense matrix
4077  , bool SO > // Storage order of the target dense matrix
4079  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4080  {
4082 
4083  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4084  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4085 
4086  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4087  return;
4088  }
4089  else if( rhs.lhs_.columns() == 0UL ) {
4090  reset( ~lhs );
4091  return;
4092  }
4093 
4094  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4095  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4096 
4097  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4098  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4099  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4100  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4101  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4102  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4103 
4104  smpAssign( ~lhs, A * B );
4105  }
4107  //**********************************************************************************************
4108 
4109  //**SMP assignment to sparse matrices***********************************************************
4124  template< typename MT // Type of the target sparse matrix
4125  , bool SO > // Storage order of the target sparse matrix
4127  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4128  {
4130 
4132 
4139 
4140  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4141  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4142 
4143  const ForwardFunctor fwd;
4144 
4145  const TmpType tmp( rhs );
4146  smpAssign( ~lhs, fwd( tmp ) );
4147  }
4149  //**********************************************************************************************
4150 
4151  //**SMP addition assignment to dense matrices***************************************************
4167  template< typename MT // Type of the target dense matrix
4168  , bool SO > // Storage order of the target dense matrix
4171  {
4173 
4174  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4175  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4176 
4177  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4178  return;
4179  }
4180 
4181  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4182  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4183 
4184  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4185  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4186  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4187  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4188  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4189  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4190 
4191  smpAddAssign( ~lhs, A * B );
4192  }
4194  //**********************************************************************************************
4195 
4196  //**SMP addition assignment to sparse matrices**************************************************
4197  // No special implementation for the SMP addition assignment to sparse matrices.
4198  //**********************************************************************************************
4199 
4200  //**SMP subtraction assignment to dense matrices************************************************
4216  template< typename MT // Type of the target dense matrix
4217  , bool SO > // Storage order of the target dense matrix
4220  {
4222 
4223  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4224  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4225 
4226  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4227  return;
4228  }
4229 
4230  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4231  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4232 
4233  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4234  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4235  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4236  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4237  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4238  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4239 
4240  smpSubAssign( ~lhs, A * B );
4241  }
4243  //**********************************************************************************************
4244 
4245  //**SMP subtraction assignment to sparse matrices***********************************************
4246  // No special implementation for the SMP subtraction assignment to sparse matrices.
4247  //**********************************************************************************************
4248 
4249  //**SMP Schur product assignment to dense matrices**********************************************
4262  template< typename MT // Type of the target dense matrix
4263  , bool SO > // Storage order of the target dense matrix
4264  friend inline void smpSchurAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4265  {
4267 
4271 
4272  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4273  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4274 
4275  const ResultType tmp( rhs );
4276  smpSchurAssign( ~lhs, tmp );
4277  }
4279  //**********************************************************************************************
4280 
4281  //**SMP Schur product assignment to sparse matrices*********************************************
4282  // No special implementation for the SMP Schur product assignment to sparse matrices.
4283  //**********************************************************************************************
4284 
4285  //**SMP multiplication assignment to dense matrices*********************************************
4286  // No special implementation for the SMP multiplication assignment to dense matrices.
4287  //**********************************************************************************************
4288 
4289  //**SMP multiplication assignment to sparse matrices********************************************
4290  // No special implementation for the SMP multiplication assignment to sparse matrices.
4291  //**********************************************************************************************
4292 
4293  //**Compile time checks*************************************************************************
4301  //**********************************************************************************************
4302 };
4303 //*************************************************************************************************
4304 
4305 
4306 
4307 
4308 //=================================================================================================
4309 //
4310 // DMATSCALARMULTEXPR SPECIALIZATION
4311 //
4312 //=================================================================================================
4313 
4314 //*************************************************************************************************
4322 template< typename MT1 // Type of the left-hand side dense matrix
4323  , typename MT2 // Type of the right-hand side dense matrix
4324  , bool SF // Symmetry flag
4325  , bool HF // Hermitian flag
4326  , bool LF // Lower flag
4327  , bool UF // Upper flag
4328  , typename ST > // Type of the right-hand side scalar value
4329 class DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, ST, false >
4330  : public MatScalarMultExpr< DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, ST, false >, false > >
4331  , private Computation
4332 {
4333  private:
4334  //**Type definitions****************************************************************************
4337 
4338  using RES = ResultType_<MMM>;
4339  using RT1 = ResultType_<MT1>;
4340  using RT2 = ResultType_<MT2>;
4341  using ET1 = ElementType_<RT1>;
4342  using ET2 = ElementType_<RT2>;
4343  using CT1 = CompositeType_<MT1>;
4344  using CT2 = CompositeType_<MT2>;
4345  //**********************************************************************************************
4346 
4347  //**********************************************************************************************
4349  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
4350  //**********************************************************************************************
4351 
4352  //**********************************************************************************************
4354  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
4355  //**********************************************************************************************
4356 
4357  //**********************************************************************************************
4359  enum : bool {
4360  SYM = ( SF && !( HF || LF || UF ) ),
4361  HERM = ( HF && !( LF || UF ) ),
4362  LOW = ( LF || ( ( SF || HF ) && UF ) ),
4363  UPP = ( UF || ( ( SF || HF ) && LF ) )
4364  };
4365  //**********************************************************************************************
4366 
4367  //**********************************************************************************************
4369 
4372  template< typename T1, typename T2, typename T3 >
4373  struct IsEvaluationRequired {
4374  enum : bool { value = ( evaluateLeft || evaluateRight ) };
4375  };
4376  //**********************************************************************************************
4377 
4378  //**********************************************************************************************
4380 
4382  template< typename T1, typename T2, typename T3, typename T4 >
4383  struct UseBlasKernel {
4384  enum : bool { value = BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION &&
4385  !SYM && !HERM && !LOW && !UPP &&
4390  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
4395  IsSame< ElementType_<T1>, ElementType_<T3> >::value &&
4397  };
4398  //**********************************************************************************************
4399 
4400  //**********************************************************************************************
4402 
4404  template< typename T1, typename T2, typename T3, typename T4 >
4405  struct UseVectorizedDefaultKernel {
4406  enum : bool { value = useOptimizedKernels &&
4408  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
4412  , T4 >::value &&
4413  HasSIMDAdd< ElementType_<T2>, ElementType_<T3> >::value &&
4414  HasSIMDMult< ElementType_<T2>, ElementType_<T3> >::value };
4415  };
4416  //**********************************************************************************************
4417 
4418  //**********************************************************************************************
4420 
4422  using ForwardFunctor = IfTrue_< HERM
4423  , DeclHerm
4424  , IfTrue_< SYM
4425  , DeclSym
4426  , IfTrue_< LOW
4427  , IfTrue_< UPP
4428  , DeclDiag
4429  , DeclLow >
4430  , IfTrue_< UPP
4431  , DeclUpp
4432  , Noop > > > >;
4433  //**********************************************************************************************
4434 
4435  public:
4436  //**Type definitions****************************************************************************
4439 
4440  using ResultType = MultTrait_<RES,ST>;
4445  using ReturnType = const ElementType;
4446  using CompositeType = const ResultType;
4447 
4450 
4452  using RightOperand = ST;
4453 
4456 
4459  //**********************************************************************************************
4460 
4461  //**Compilation flags***************************************************************************
4463  enum : bool { simdEnabled = !IsDiagonal<MT1>::value && !IsDiagonal<MT2>::value &&
4464  MT1::simdEnabled && MT2::simdEnabled &&
4468 
4470  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
4471  !evaluateRight && MT2::smpAssignable };
4472  //**********************************************************************************************
4473 
4474  //**SIMD properties*****************************************************************************
4476  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
4477  //**********************************************************************************************
4478 
4479  //**Constructor*********************************************************************************
4485  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
4486  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
4487  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
4488  {}
4489  //**********************************************************************************************
4490 
4491  //**Access operator*****************************************************************************
4498  inline ReturnType operator()( size_t i, size_t j ) const {
4499  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
4500  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
4501  return matrix_(i,j) * scalar_;
4502  }
4503  //**********************************************************************************************
4504 
4505  //**At function*********************************************************************************
4513  inline ReturnType at( size_t i, size_t j ) const {
4514  if( i >= matrix_.rows() ) {
4515  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
4516  }
4517  if( j >= matrix_.columns() ) {
4518  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
4519  }
4520  return (*this)(i,j);
4521  }
4522  //**********************************************************************************************
4523 
4524  //**Rows function*******************************************************************************
4529  inline size_t rows() const {
4530  return matrix_.rows();
4531  }
4532  //**********************************************************************************************
4533 
4534  //**Columns function****************************************************************************
4539  inline size_t columns() const {
4540  return matrix_.columns();
4541  }
4542  //**********************************************************************************************
4543 
4544  //**Left operand access*************************************************************************
4549  inline LeftOperand leftOperand() const {
4550  return matrix_;
4551  }
4552  //**********************************************************************************************
4553 
4554  //**Right operand access************************************************************************
4559  inline RightOperand rightOperand() const {
4560  return scalar_;
4561  }
4562  //**********************************************************************************************
4563 
4564  //**********************************************************************************************
4570  template< typename T >
4571  inline bool canAlias( const T* alias ) const {
4572  return matrix_.canAlias( alias );
4573  }
4574  //**********************************************************************************************
4575 
4576  //**********************************************************************************************
4582  template< typename T >
4583  inline bool isAliased( const T* alias ) const {
4584  return matrix_.isAliased( alias );
4585  }
4586  //**********************************************************************************************
4587 
4588  //**********************************************************************************************
4593  inline bool isAligned() const {
4594  return matrix_.isAligned();
4595  }
4596  //**********************************************************************************************
4597 
4598  //**********************************************************************************************
4603  inline bool canSMPAssign() const noexcept {
4604  return ( !BLAZE_BLAS_MODE ||
4605  !BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION ||
4607  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
4608  ( rows() * columns() >= SMP_DMATTDMATMULT_THRESHOLD );
4609  }
4610  //**********************************************************************************************
4611 
4612  private:
4613  //**Member variables****************************************************************************
4614  LeftOperand matrix_;
4615  RightOperand scalar_;
4616  //**********************************************************************************************
4617 
4618  //**Assignment to dense matrices****************************************************************
4630  template< typename MT // Type of the target dense matrix
4631  , bool SO > // Storage order of the target dense matrix
4632  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4633  {
4635 
4636  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4637  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4638 
4639  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
4640  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
4641 
4642  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4643  return;
4644  }
4645  else if( left.columns() == 0UL ) {
4646  reset( ~lhs );
4647  return;
4648  }
4649 
4650  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4651  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4652 
4653  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4654  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4655  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4656  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4657  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4658  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4659 
4660  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
4661  }
4662  //**********************************************************************************************
4663 
4664  //**Assignment to dense matrices (kernel selection)*********************************************
4675  template< typename MT3 // Type of the left-hand side target matrix
4676  , typename MT4 // Type of the left-hand side matrix operand
4677  , typename MT5 // Type of the right-hand side matrix operand
4678  , typename ST2 > // Type of the scalar value
4679  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4680  {
4682  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
4683  selectSmallAssignKernel( C, A, B, scalar );
4684  else
4685  selectBlasAssignKernel( C, A, B, scalar );
4686  }
4687  //**********************************************************************************************
4688 
4689  //**Default assignment to row-major dense matrices (general/general)****************************
4703  template< typename MT3 // Type of the left-hand side target matrix
4704  , typename MT4 // Type of the left-hand side matrix operand
4705  , typename MT5 // Type of the right-hand side matrix operand
4706  , typename ST2 > // Type of the scalar value
4708  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4709  {
4710  const size_t M( A.rows() );
4711  const size_t N( B.columns() );
4712  const size_t K( A.columns() );
4713 
4714  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
4715 
4716  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
4717  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
4718  :( 0UL ) );
4719  const size_t iend( ( IsStrictlyUpper<MT4>::value )
4720  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
4721  :( M ) );
4722  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4723 
4724  for( size_t i=0UL; i<ibegin; ++i ) {
4725  for( size_t j=0UL; j<N; ++j ) {
4726  reset( (~C)(i,j) );
4727  }
4728  }
4729  for( size_t i=ibegin; i<iend; ++i )
4730  {
4731  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4733  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
4734  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
4736  ?( SYM || HERM || UPP ? max( i, 1UL ) : 1UL )
4737  :( SYM || HERM || UPP ? i : 0UL ) ) );
4738  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
4740  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
4741  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
4743  ?( LOW ? min(i+1UL,N-1UL) : N-1UL )
4744  :( LOW ? i+1UL : N ) ) );
4745 
4746  if( ( SYM || HERM || LOW || UPP ) && ( jbegin > jend ) ) {
4747  for( size_t j=0UL; j<N; ++j ) {
4748  reset( (~C)(i,j) );
4749  }
4750  continue;
4751  }
4752 
4753  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4754 
4755  for( size_t j=( SYM || HERM ? i : 0UL ); j<jbegin; ++j ) {
4756  reset( (~C)(i,j) );
4757  }
4758  for( size_t j=jbegin; j<jend; ++j )
4759  {
4760  const size_t kbegin( ( IsUpper<MT4>::value )
4761  ?( ( IsLower<MT5>::value )
4762  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4763  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4764  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4765  :( ( IsLower<MT5>::value )
4766  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4767  :( 0UL ) ) );
4768  const size_t kend( ( IsLower<MT4>::value )
4769  ?( ( IsUpper<MT5>::value )
4770  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
4771  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
4772  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4773  :( ( IsUpper<MT5>::value )
4774  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4775  :( K ) ) );
4776  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
4777 
4778  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
4779  for( size_t k=kbegin+1UL; k<kend; ++k ) {
4780  (~C)(i,j) += A(i,k) * B(k,j);
4781  }
4782  (~C)(i,j) *= scalar;
4783  }
4784  for( size_t j=jend; j<N; ++j ) {
4785  reset( (~C)(i,j) );
4786  }
4787  }
4788  for( size_t i=iend; i<M; ++i ) {
4789  for( size_t j=0UL; j<N; ++j ) {
4790  reset( (~C)(i,j) );
4791  }
4792  }
4793 
4794  if( SYM || HERM ) {
4795  for( size_t i=1UL; i<M; ++i ) {
4796  for( size_t j=0UL; j<i; ++j ) {
4797  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
4798  }
4799  }
4800  }
4801  }
4802  //**********************************************************************************************
4803 
4804  //**Default assignment to column-major dense matrices (general/general)*************************
4818  template< typename MT3 // Type of the left-hand side target matrix
4819  , typename MT4 // Type of the left-hand side matrix operand
4820  , typename MT5 // Type of the right-hand side matrix operand
4821  , typename ST2 > // Type of the scalar value
4822  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
4823  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4824  {
4825  const size_t M( A.rows() );
4826  const size_t N( B.columns() );
4827  const size_t K( A.columns() );
4828 
4829  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
4830 
4831  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
4832  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
4833  :( 0UL ) );
4834  const size_t jend( ( IsStrictlyLower<MT5>::value )
4835  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
4836  :( N ) );
4837  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4838 
4839  for( size_t j=0UL; j<jbegin; ++j ) {
4840  for( size_t i=0UL; i<M; ++i ) {
4841  reset( (~C)(i,j) );
4842  }
4843  }
4844  for( size_t j=jbegin; j<jend; ++j )
4845  {
4846  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
4848  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
4849  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4851  ?( SYM || HERM || LOW ? max( j, 1UL ) : 1UL )
4852  :( SYM || HERM || LOW ? j : 0UL ) ) );
4853  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4855  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
4856  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
4858  ?( UPP ? min(j+1UL,M-1UL) : M-1UL )
4859  :( UPP ? j+1UL : M ) ) );
4860 
4861  if( ( SYM || HERM || LOW || UPP ) && ( ibegin > iend ) ) {
4862  for( size_t i=0UL; i<M; ++i ) {
4863  reset( (~C)(i,j) );
4864  }
4865  continue;
4866  }
4867 
4868  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4869 
4870  for( size_t i=( SYM || HERM ? j : 0UL ); i<ibegin; ++i ) {
4871  reset( (~C)(i,j) );
4872  }
4873  for( size_t i=ibegin; i<iend; ++i )
4874  {
4875  const size_t kbegin( ( IsUpper<MT4>::value )
4876  ?( ( IsLower<MT5>::value )
4877  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4878  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4879  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4880  :( ( IsLower<MT5>::value )
4881  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4882  :( 0UL ) ) );
4883  const size_t kend( ( IsLower<MT4>::value )
4884  ?( ( IsUpper<MT5>::value )
4885  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
4886  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
4887  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4888  :( ( IsUpper<MT5>::value )
4889  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4890  :( K ) ) );
4891  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
4892 
4893  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
4894  for( size_t k=kbegin+1UL; k<kend; ++k ) {
4895  (~C)(i,j) += A(i,k) * B(k,j);
4896  }
4897  (~C)(i,j) *= scalar;
4898  }
4899  for( size_t i=iend; i<M; ++i ) {
4900  reset( (~C)(i,j) );
4901  }
4902  }
4903  for( size_t j=jend; j<N; ++j ) {
4904  for( size_t i=0UL; i<M; ++i ) {
4905  reset( (~C)(i,j) );
4906  }
4907  }
4908 
4909  if( SYM || HERM ) {
4910  for( size_t j=1UL; j<N; ++j ) {
4911  for( size_t i=0UL; i<j; ++i ) {
4912  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
4913  }
4914  }
4915  }
4916  }
4917  //**********************************************************************************************
4918 
4919  //**Default assignment to row-major dense matrices (general/diagonal)***************************
4933  template< typename MT3 // Type of the left-hand side target matrix
4934  , typename MT4 // Type of the left-hand side matrix operand
4935  , typename MT5 // Type of the right-hand side matrix operand
4936  , typename ST2 > // Type of the scalar value
4937  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
4938  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4939  {
4940  const size_t M( A.rows() );
4941  const size_t N( B.columns() );
4942 
4943  for( size_t i=0UL; i<M; ++i )
4944  {
4945  const size_t jbegin( ( IsUpper<MT4>::value )
4946  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4947  :( 0UL ) );
4948  const size_t jend( ( IsLower<MT4>::value )
4949  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4950  :( N ) );
4951  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4952 
4953  if( IsUpper<MT4>::value ) {
4954  for( size_t j=0UL; j<jbegin; ++j ) {
4955  reset( (~C)(i,j) );
4956  }
4957  }
4958  for( size_t j=jbegin; j<jend; ++j ) {
4959  (~C)(i,j) = A(i,j) * B(j,j) * scalar;
4960  }
4961  if( IsLower<MT4>::value ) {
4962  for( size_t j=jend; j<N; ++j ) {
4963  reset( (~C)(i,j) );
4964  }
4965  }
4966  }
4967  }
4968  //**********************************************************************************************
4969 
4970  //**Default assignment to column-major dense matrices (general/diagonal)************************
4984  template< typename MT3 // Type of the left-hand side target matrix
4985  , typename MT4 // Type of the left-hand side matrix operand
4986  , typename MT5 // Type of the right-hand side matrix operand
4987  , typename ST2 > // Type of the scalar value
4988  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
4989  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4990  {
4991  constexpr size_t block( BLOCK_SIZE );
4992 
4993  const size_t M( A.rows() );
4994  const size_t N( B.columns() );
4995 
4996  for( size_t jj=0UL; jj<N; jj+=block ) {
4997  const size_t jend( min( N, jj+block ) );
4998  for( size_t ii=0UL; ii<M; ii+=block ) {
4999  const size_t iend( min( M, ii+block ) );
5000  for( size_t j=jj; j<jend; ++j )
5001  {
5002  const size_t ibegin( ( IsLower<MT4>::value )
5003  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
5004  :( ii ) );
5005  const size_t ipos( ( IsUpper<MT4>::value )
5006  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
5007  :( iend ) );
5008 
5009  if( IsLower<MT4>::value ) {
5010  for( size_t i=ii; i<ibegin; ++i ) {
5011  reset( (~C)(i,j) );
5012  }
5013  }
5014  for( size_t i=ibegin; i<ipos; ++i ) {
5015  (~C)(i,j) = A(i,j) * B(j,j) * scalar;
5016  }
5017  if( IsUpper<MT4>::value ) {
5018  for( size_t i=ipos; i<iend; ++i ) {
5019  reset( (~C)(i,j) );
5020  }
5021  }
5022  }
5023  }
5024  }
5025  }
5026  //**********************************************************************************************
5027 
5028  //**Default assignment to row-major dense matrices (diagonal/general)***************************
5042  template< typename MT3 // Type of the left-hand side target matrix
5043  , typename MT4 // Type of the left-hand side matrix operand
5044  , typename MT5 // Type of the right-hand side matrix operand
5045  , typename ST2 > // Type of the scalar value
5047  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5048  {
5049  constexpr size_t block( BLOCK_SIZE );
5050 
5051  const size_t M( A.rows() );
5052  const size_t N( B.columns() );
5053 
5054  for( size_t ii=0UL; ii<M; ii+=block ) {
5055  const size_t iend( min( M, ii+block ) );
5056  for( size_t jj=0UL; jj<N; jj+=block ) {
5057  const size_t jend( min( N, jj+block ) );
5058  for( size_t i=ii; i<iend; ++i )
5059  {
5060  const size_t jbegin( ( IsUpper<MT5>::value )
5061  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
5062  :( jj ) );
5063  const size_t jpos( ( IsLower<MT5>::value )
5064  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
5065  :( jend ) );
5066 
5067  if( IsUpper<MT5>::value ) {
5068  for( size_t j=jj; j<jbegin; ++j ) {
5069  reset( (~C)(i,j) );
5070  }
5071  }
5072  for( size_t j=jbegin; j<jpos; ++j ) {
5073  (~C)(i,j) = A(i,i) * B(i,j) * scalar;
5074  }
5075  if( IsLower<MT5>::value ) {
5076  for( size_t j=jpos; j<jend; ++j ) {
5077  reset( (~C)(i,j) );
5078  }
5079  }
5080  }
5081  }
5082  }
5083  }
5084  //**********************************************************************************************
5085 
5086  //**Default assignment to column-major dense matrices (diagonal/general)************************
5100  template< typename MT3 // Type of the left-hand side target matrix
5101  , typename MT4 // Type of the left-hand side matrix operand
5102  , typename MT5 // Type of the right-hand side matrix operand
5103  , typename ST2 > // Type of the scalar value
5104  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
5105  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5106  {
5107  const size_t M( A.rows() );
5108  const size_t N( B.columns() );
5109 
5110  for( size_t j=0UL; j<N; ++j )
5111  {
5112  const size_t ibegin( ( IsLower<MT5>::value )
5113  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
5114  :( 0UL ) );
5115  const size_t iend( ( IsUpper<MT5>::value )
5116  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
5117  :( M ) );
5118  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
5119 
5120  if( IsLower<MT5>::value ) {
5121  for( size_t i=0UL; i<ibegin; ++i ) {
5122  reset( (~C)(i,j) );
5123  }
5124  }
5125  for( size_t i=ibegin; i<iend; ++i ) {
5126  (~C)(i,j) = A(i,i) * B(i,j) * scalar;
5127  }
5128  if( IsUpper<MT5>::value ) {
5129  for( size_t i=iend; i<M; ++i ) {
5130  reset( (~C)(i,j) );
5131  }
5132  }
5133  }
5134  }
5135  //**********************************************************************************************
5136 
5137  //**Default assignment to dense matrices (diagonal/diagonal)************************************
5151  template< typename MT3 // Type of the left-hand side target matrix
5152  , typename MT4 // Type of the left-hand side matrix operand
5153  , typename MT5 // Type of the right-hand side matrix operand
5154  , typename ST2 > // Type of the scalar value
5155  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
5156  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5157  {
5158  reset( C );
5159 
5160  for( size_t i=0UL; i<A.rows(); ++i ) {
5161  C(i,i) = A(i,i) * B(i,i) * scalar;
5162  }
5163  }
5164  //**********************************************************************************************
5165 
5166  //**Default assignment to dense matrices (small matrices)***************************************
5180  template< typename MT3 // Type of the left-hand side target matrix
5181  , typename MT4 // Type of the left-hand side matrix operand
5182  , typename MT5 // Type of the right-hand side matrix operand
5183  , typename ST2 > // Type of the scalar value
5185  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5186  {
5187  selectDefaultAssignKernel( C, A, B, scalar );
5188  }
5189  //**********************************************************************************************
5190 
5191  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
5206  template< typename MT3 // Type of the left-hand side target matrix
5207  , typename MT4 // Type of the left-hand side matrix operand
5208  , typename MT5 // Type of the right-hand side matrix operand
5209  , typename ST2 > // Type of the scalar value
5211  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5212  {
5213  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
5214 
5215  const size_t M( A.rows() );
5216  const size_t N( B.columns() );
5217  const size_t K( A.columns() );
5218 
5219  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
5220 
5221  if( LOW && UPP ) {
5222  reset( ~C );
5223  }
5224 
5225  {
5226  size_t i( 0UL );
5227 
5228  for( ; !( LOW && UPP ) && (i+2UL) <= M; i+=2UL )
5229  {
5230  const size_t jend( LOW ? i+2UL : N );
5231  size_t j( SYM || HERM || UPP ? i : 0UL );
5232 
5233  for( ; (j+4UL) <= jend; j+=4UL )
5234  {
5235  const size_t kbegin( ( IsUpper<MT4>::value )
5236  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5237  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5238  const size_t kend( ( IsLower<MT4>::value )
5239  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
5240  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
5241 
5242  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5243  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5244 
5245  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5246  size_t k( kbegin );
5247 
5248  for( ; k<kpos; k+=SIMDSIZE ) {
5249  const SIMDType a1( A.load(i ,k) );
5250  const SIMDType a2( A.load(i+1UL,k) );
5251  const SIMDType b1( B.load(k,j ) );
5252  const SIMDType b2( B.load(k,j+1UL) );
5253  const SIMDType b3( B.load(k,j+2UL) );
5254  const SIMDType b4( B.load(k,j+3UL) );
5255  xmm1 += a1 * b1;
5256  xmm2 += a1 * b2;
5257  xmm3 += a1 * b3;
5258  xmm4 += a1 * b4;
5259  xmm5 += a2 * b1;
5260  xmm6 += a2 * b2;
5261  xmm7 += a2 * b3;
5262  xmm8 += a2 * b4;
5263  }
5264 
5265  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5266  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5267  (~C)(i ,j+2UL) = sum( xmm3 ) * scalar;
5268  (~C)(i ,j+3UL) = sum( xmm4 ) * scalar;
5269  (~C)(i+1UL,j ) = sum( xmm5 ) * scalar;
5270  (~C)(i+1UL,j+1UL) = sum( xmm6 ) * scalar;
5271  (~C)(i+1UL,j+2UL) = sum( xmm7 ) * scalar;
5272  (~C)(i+1UL,j+3UL) = sum( xmm8 ) * scalar;
5273 
5274  for( ; remainder && k<kend; ++k ) {
5275  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5276  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5277  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL) * scalar;
5278  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL) * scalar;
5279  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5280  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5281  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL) * scalar;
5282  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL) * scalar;
5283  }
5284  }
5285 
5286  for( ; (j+2UL) <= jend; j+=2UL )
5287  {
5288  const size_t kbegin( ( IsUpper<MT4>::value )
5289  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5290  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5291  const size_t kend( ( IsLower<MT4>::value )
5292  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
5293  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5294 
5295  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5296  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5297 
5298  SIMDType xmm1, xmm2, xmm3, xmm4;
5299  size_t k( kbegin );
5300 
5301  for( ; k<kpos; k+=SIMDSIZE ) {
5302  const SIMDType a1( A.load(i ,k) );
5303  const SIMDType a2( A.load(i+1UL,k) );
5304  const SIMDType b1( B.load(k,j ) );
5305  const SIMDType b2( B.load(k,j+1UL) );
5306  xmm1 += a1 * b1;
5307  xmm2 += a1 * b2;
5308  xmm3 += a2 * b1;
5309  xmm4 += a2 * b2;
5310  }
5311 
5312  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5313  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5314  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5315  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5316 
5317  for( ; remainder && k<kend; ++k ) {
5318  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5319  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5320  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5321  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5322  }
5323  }
5324 
5325  if( j < jend )
5326  {
5327  const size_t kbegin( ( IsUpper<MT4>::value )
5328  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5329  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5330  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
5331 
5332  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5333  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5334 
5335  SIMDType xmm1, xmm2;
5336  size_t k( kbegin );
5337 
5338  for( ; k<kpos; k+=SIMDSIZE ) {
5339  const SIMDType b1( B.load(k,j) );
5340  xmm1 += A.load(i ,k) * b1;
5341  xmm2 += A.load(i+1UL,k) * b1;
5342  }
5343 
5344  (~C)(i ,j) = sum( xmm1 ) * scalar;
5345  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5346 
5347  for( ; remainder && k<kend; ++k ) {
5348  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
5349  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
5350  }
5351  }
5352  }
5353 
5354  for( ; i<M; ++i )
5355  {
5356  const size_t jend( LOW ? i+1UL : N );
5357  size_t j( SYM || HERM || UPP ? i : 0UL );
5358 
5359  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
5360  {
5361  const size_t kbegin( ( IsUpper<MT4>::value )
5362  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5363  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5364  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
5365 
5366  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5367  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5368 
5369  SIMDType xmm1, xmm2, xmm3, xmm4;
5370  size_t k( kbegin );
5371 
5372  for( ; k<kpos; k+=SIMDSIZE ) {
5373  const SIMDType a1( A.load(i,k) );
5374  xmm1 += a1 * B.load(k,j );
5375  xmm2 += a1 * B.load(k,j+1UL);
5376  xmm3 += a1 * B.load(k,j+2UL);
5377  xmm4 += a1 * B.load(k,j+3UL);
5378  }
5379 
5380  (~C)(i,j ) = sum( xmm1 ) * scalar;
5381  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5382  (~C)(i,j+2UL) = sum( xmm3 ) * scalar;
5383  (~C)(i,j+3UL) = sum( xmm4 ) * scalar;
5384 
5385  for( ; remainder && k<kend; ++k ) {
5386  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
5387  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
5388  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL) * scalar;
5389  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL) * scalar;
5390  }
5391  }
5392 
5393  for( ; !( LOW && UPP ) && (j+2UL) <= jend; j+=2UL )
5394  {
5395  const size_t kbegin( ( IsUpper<MT4>::value )
5396  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5397  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5398  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
5399 
5400  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5401  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5402 
5403  SIMDType xmm1, xmm2;
5404  size_t k( kbegin );
5405 
5406  for( ; k<kpos; k+=SIMDSIZE ) {
5407  const SIMDType a1( A.load(i,k) );
5408  xmm1 += a1 * B.load(k,j );
5409  xmm2 += a1 * B.load(k,j+1UL);
5410  }
5411 
5412  (~C)(i,j ) = sum( xmm1 ) * scalar;
5413  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5414 
5415  for( ; remainder && k<kend; ++k ) {
5416  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
5417  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
5418  }
5419  }
5420 
5421  for( ; j<jend; ++j )
5422  {
5423  const size_t kbegin( ( IsUpper<MT4>::value )
5424  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5425  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5426 
5427  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
5428  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5429 
5430  SIMDType xmm1;
5431  size_t k( kbegin );
5432 
5433  for( ; k<kpos; k+=SIMDSIZE ) {
5434  xmm1 += A.load(i,k) * B.load(k,j);
5435  }
5436 
5437  (~C)(i,j) = sum( xmm1 ) * scalar;
5438 
5439  for( ; remainder && k<K; ++k ) {
5440  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
5441  }
5442  }
5443  }
5444  }
5445 
5446  if( SYM || HERM ) {
5447  for( size_t i=2UL; i<M; ++i ) {
5448  const size_t jend( 2UL * ( i/2UL ) );
5449  for( size_t j=0UL; j<jend; ++j ) {
5450  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
5451  }
5452  }
5453  }
5454  else if( LOW && !UPP ) {
5455  for( size_t j=2UL; j<N; ++j ) {
5456  const size_t iend( 2UL * ( j/2UL ) );
5457  for( size_t i=0UL; i<iend; ++i ) {
5458  reset( (~C)(i,j) );
5459  }
5460  }
5461  }
5462  else if( !LOW && UPP ) {
5463  for( size_t i=2UL; i<M; ++i ) {
5464  const size_t jend( 2UL * ( i/2UL ) );
5465  for( size_t j=0UL; j<jend; ++j ) {
5466  reset( (~C)(i,j) );
5467  }
5468  }
5469  }
5470  }
5471  //**********************************************************************************************
5472 
5473  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
5488  template< typename MT3 // Type of the left-hand side target matrix
5489  , typename MT4 // Type of the left-hand side matrix operand
5490  , typename MT5 // Type of the right-hand side matrix operand
5491  , typename ST2 > // Type of the scalar value
5493  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5494  {
5495  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
5496 
5497  const size_t M( A.rows() );
5498  const size_t N( B.columns() );
5499  const size_t K( A.columns() );
5500 
5501  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
5502 
5503  if( LOW || UPP ) {
5504  reset( ~C );
5505  }
5506 
5507  {
5508  size_t i( 0UL );
5509 
5510  for( ; !SYM && !HERM && !LOW && !UPP && (i+4UL) <= M; i+=4UL )
5511  {
5512  size_t j( 0UL );
5513 
5514  for( ; (j+2UL) <= N; j+=2UL )
5515  {
5516  const size_t kbegin( ( IsUpper<MT4>::value )
5517  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5518  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5519  const size_t kend( ( IsLower<MT4>::value )
5520  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
5521  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5522 
5523  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5524  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5525 
5526  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5527  size_t k( kbegin );
5528 
5529  for( ; k<kpos; k+=SIMDSIZE ) {
5530  const SIMDType a1( A.load(i ,k) );
5531  const SIMDType a2( A.load(i+1UL,k) );
5532  const SIMDType a3( A.load(i+2UL,k) );
5533  const SIMDType a4( A.load(i+3UL,k) );
5534  const SIMDType b1( B.load(k,j ) );
5535  const SIMDType b2( B.load(k,j+1UL) );
5536  xmm1 += a1 * b1;
5537  xmm2 += a1 * b2;
5538  xmm3 += a2 * b1;
5539  xmm4 += a2 * b2;
5540  xmm5 += a3 * b1;
5541  xmm6 += a3 * b2;
5542  xmm7 += a4 * b1;
5543  xmm8 += a4 * b2;
5544  }
5545 
5546  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5547  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5548  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5549  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5550  (~C)(i+2UL,j ) = sum( xmm5 ) * scalar;
5551  (~C)(i+2UL,j+1UL) = sum( xmm6 ) * scalar;
5552  (~C)(i+3UL,j ) = sum( xmm7 ) * scalar;
5553  (~C)(i+3UL,j+1UL) = sum( xmm8 ) * scalar;
5554 
5555  for( ; remainder && k<kend; ++k ) {
5556  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5557  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5558  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5559  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5560  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j ) * scalar;
5561  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL) * scalar;
5562  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j ) * scalar;
5563  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL) * scalar;
5564  }
5565  }
5566 
5567  if( j < N )
5568  {
5569  const size_t kbegin( ( IsUpper<MT4>::value )
5570  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5571  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5572  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
5573 
5574  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5575  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5576 
5577  SIMDType xmm1, xmm2, xmm3, xmm4;
5578  size_t k( kbegin );
5579 
5580  for( ; k<kpos; k+=SIMDSIZE ) {
5581  const SIMDType b1( B.load(k,j) );
5582  xmm1 += A.load(i ,k) * b1;
5583  xmm2 += A.load(i+1UL,k) * b1;
5584  xmm3 += A.load(i+2UL,k) * b1;
5585  xmm4 += A.load(i+3UL,k) * b1;
5586  }
5587 
5588  (~C)(i ,j) = sum( xmm1 ) * scalar;
5589  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5590  (~C)(i+2UL,j) = sum( xmm3 ) * scalar;
5591  (~C)(i+3UL,j) = sum( xmm4 ) * scalar;
5592 
5593  for( ; remainder && k<kend; ++k ) {
5594  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
5595  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
5596  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j) * scalar;
5597  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j) * scalar;
5598  }
5599  }
5600  }
5601 
5602  for( ; (i+2UL) <= M; i+=2UL )
5603  {
5604  const size_t jend( LOW ? i+2UL : N );
5605  size_t j( SYM || HERM || UPP ? i : 0UL );
5606 
5607  for( ; (j+2UL) <= jend; j+=2UL )
5608  {
5609  const size_t kbegin( ( IsUpper<MT4>::value )
5610  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5611  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5612  const size_t kend( ( IsLower<MT4>::value )
5613  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
5614  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5615 
5616  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5617  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5618 
5619  SIMDType xmm1, xmm2, xmm3, xmm4;
5620  size_t k( kbegin );
5621 
5622  for( ; k<kpos; k+=SIMDSIZE ) {
5623  const SIMDType a1( A.load(i ,k) );
5624  const SIMDType a2( A.load(i+1UL,k) );
5625  const SIMDType b1( B.load(k,j ) );
5626  const SIMDType b2( B.load(k,j+1UL) );
5627  xmm1 += a1 * b1;
5628  xmm2 += a1 * b2;
5629  xmm3 += a2 * b1;
5630  xmm4 += a2 * b2;
5631  }
5632 
5633  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5634  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5635  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5636  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5637 
5638  for( ; remainder && k<kend; ++k ) {
5639  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5640  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5641  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5642  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5643  }
5644  }
5645 
5646  if( j < jend )
5647  {
5648  const size_t kbegin( ( IsUpper<MT4>::value )
5649  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5650  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5651  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
5652 
5653  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5654  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5655 
5656  SIMDType xmm1, xmm2;
5657  size_t k( kbegin );
5658 
5659  for( ; k<kpos; k+=SIMDSIZE ) {
5660  const SIMDType b1( B.load(k,j) );
5661  xmm1 += A.load(i ,k) * b1;
5662  xmm2 += A.load(i+1UL,k) * b1;
5663  }
5664 
5665  (~C)(i ,j) = sum( xmm1 ) * scalar;
5666  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5667 
5668  for( ; remainder && k<kend; ++k ) {
5669  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
5670  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
5671  }
5672  }
5673  }
5674 
5675  if( i < M )
5676  {
5677  const size_t jend( LOW ? i+1UL : N );
5678  size_t j( SYM || HERM || UPP ? i : 0UL );
5679 
5680  for( ; (j+2UL) <= jend; j+=2UL )
5681  {
5682  const size_t kbegin( ( IsUpper<MT4>::value )
5683  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5684  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5685  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
5686 
5687  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5688  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5689 
5690  SIMDType xmm1, xmm2;
5691  size_t k( kbegin );
5692 
5693  for( ; k<kpos; k+=SIMDSIZE ) {
5694  const SIMDType a1( A.load(i,k) );
5695  xmm1 += a1 * B.load(k,j );
5696  xmm2 += a1 * B.load(k,j+1UL);
5697  }
5698 
5699  (~C)(i,j ) = sum( xmm1 ) * scalar;
5700  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5701 
5702  for( ; remainder && k<kend; ++k ) {
5703  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
5704  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
5705  }
5706  }
5707 
5708  if( j < jend )
5709  {
5710  const size_t kbegin( ( IsUpper<MT4>::value )
5711  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5712  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5713 
5714  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
5715  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5716 
5717  SIMDType xmm1;
5718  size_t k( kbegin );
5719 
5720  for( ; k<kpos; k+=SIMDSIZE ) {
5721  xmm1 += A.load(i,k) * B.load(k,j);
5722  }
5723 
5724  (~C)(i,j) = sum( xmm1 ) * scalar;
5725 
5726  for( ; remainder && k<K; ++k ) {
5727  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
5728  }
5729  }
5730  }
5731  }
5732 
5733  if( SYM || HERM ) {
5734  for( size_t j=0UL; j<N; ++j ) {
5735  for( size_t i=j+1UL; i<M; ++i ) {
5736  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
5737  }
5738  }
5739  }
5740  }
5741  //**********************************************************************************************
5742 
5743  //**Default assignment to dense matrices (large matrices)***************************************
5757  template< typename MT3 // Type of the left-hand side target matrix
5758  , typename MT4 // Type of the left-hand side matrix operand
5759  , typename MT5 // Type of the right-hand side matrix operand
5760  , typename ST2 > // Type of the scalar value
5762  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5763  {
5764  selectDefaultAssignKernel( C, A, B, scalar );
5765  }
5766  //**********************************************************************************************
5767 
5768  //**Vectorized default assignment to dense matrices (large matrices)****************************
5783  template< typename MT3 // Type of the left-hand side target matrix
5784  , typename MT4 // Type of the left-hand side matrix operand
5785  , typename MT5 // Type of the right-hand side matrix operand
5786  , typename ST2 > // Type of the scalar value
5788  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5789  {
5790  if( SYM )
5791  smmm( C, A, B, scalar );
5792  else if( HERM )
5793  hmmm( C, A, B, scalar );
5794  else if( LOW )
5795  lmmm( C, A, B, scalar, ST2(0) );
5796  else if( UPP )
5797  ummm( C, A, B, scalar, ST2(0) );
5798  else
5799  mmm( C, A, B, scalar, ST2(0) );
5800  }
5801  //**********************************************************************************************
5802 
5803  //**BLAS-based assignment to dense matrices (default)*******************************************
5817  template< typename MT3 // Type of the left-hand side target matrix
5818  , typename MT4 // Type of the left-hand side matrix operand
5819  , typename MT5 // Type of the right-hand side matrix operand
5820  , typename ST2 > // Type of the scalar value
5822  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5823  {
5824  selectLargeAssignKernel( C, A, B, scalar );
5825  }
5826  //**********************************************************************************************
5827 
5828  //**BLAS-based assignment to dense matrices*****************************************************
5829 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
5830 
5843  template< typename MT3 // Type of the left-hand side target matrix
5844  , typename MT4 // Type of the left-hand side matrix operand
5845  , typename MT5 // Type of the right-hand side matrix operand
5846  , typename ST2 > // Type of the scalar value
5848  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5849  {
5850  using ET = ElementType_<MT3>;
5851 
5852  if( IsTriangular<MT4>::value ) {
5853  assign( C, B );
5854  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5855  }
5856  else if( IsTriangular<MT5>::value ) {
5857  assign( C, A );
5858  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5859  }
5860  else {
5861  gemm( C, A, B, ET(scalar), ET(0) );
5862  }
5863  }
5864 #endif
5865  //**********************************************************************************************
5866 
5867  //**Assignment to sparse matrices***************************************************************
5879  template< typename MT // Type of the target sparse matrix
5880  , bool SO > // Storage order of the target sparse matrix
5881  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5882  {
5884 
5886 
5893 
5894  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5895  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5896 
5897  const ForwardFunctor fwd;
5898 
5899  const TmpType tmp( serial( rhs ) );
5900  assign( ~lhs, fwd( tmp ) );
5901  }
5902  //**********************************************************************************************
5903 
5904  //**Addition assignment to dense matrices*******************************************************
5916  template< typename MT // Type of the target dense matrix
5917  , bool SO > // Storage order of the target dense matrix
5918  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5919  {
5921 
5922  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5923  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5924 
5925  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
5926  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
5927 
5928  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
5929  return;
5930  }
5931 
5932  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
5933  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
5934 
5935  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5936  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
5937  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
5938  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
5939  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
5940  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
5941 
5942  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
5943  }
5944  //**********************************************************************************************
5945 
5946  //**Addition assignment to dense matrices (kernel selection)************************************
5957  template< typename MT3 // Type of the left-hand side target matrix
5958  , typename MT4 // Type of the left-hand side matrix operand
5959  , typename MT5 // Type of the right-hand side matrix operand
5960  , typename ST2 > // Type of the scalar value
5961  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5962  {
5964  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
5965  selectSmallAddAssignKernel( C, A, B, scalar );
5966  else
5967  selectBlasAddAssignKernel( C, A, B, scalar );
5968  }
5969  //**********************************************************************************************
5970 
5971  //**Default addition assignment to dense matrices (general/general)*****************************
5985  template< typename MT3 // Type of the left-hand side target matrix
5986  , typename MT4 // Type of the left-hand side matrix operand
5987  , typename MT5 // Type of the right-hand side matrix operand
5988  , typename ST2 > // Type of the scalar value
5989  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
5990  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5991  {
5992  const ResultType tmp( serial( A * B * scalar ) );
5993  addAssign( C, tmp );
5994  }
5995  //**********************************************************************************************
5996 
5997  //**Default addition assignment to row-major dense matrices (general/diagonal)******************
6011  template< typename MT3 // Type of the left-hand side target matrix
6012  , typename MT4 // Type of the left-hand side matrix operand
6013  , typename MT5 // Type of the right-hand side matrix operand
6014  , typename ST2 > // Type of the scalar value
6015  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6016  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6017  {
6018  const size_t M( A.rows() );
6019  const size_t N( B.columns() );
6020 
6021  for( size_t i=0UL; i<M; ++i )
6022  {
6023  const size_t jbegin( ( IsUpper<MT4>::value )
6024  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
6025  :( 0UL ) );
6026  const size_t jend( ( IsLower<MT4>::value )
6027  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
6028  :( N ) );
6029  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
6030 
6031  const size_t jnum( jend - jbegin );
6032  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
6033 
6034  for( size_t j=jbegin; j<jpos; j+=2UL ) {
6035  (~C)(i,j ) += A(i,j ) * B(j ,j ) * scalar;
6036  (~C)(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
6037  }
6038  if( jpos < jend ) {
6039  (~C)(i,jpos) += A(i,jpos) * B(jpos,jpos) * scalar;
6040  }
6041  }
6042  }
6043  //**********************************************************************************************
6044 
6045  //**Default addition assignment to column-major dense matrices (general/diagonal)***************
6059  template< typename MT3 // Type of the left-hand side target matrix
6060  , typename MT4 // Type of the left-hand side matrix operand
6061  , typename MT5 // Type of the right-hand side matrix operand
6062  , typename ST2 > // Type of the scalar value
6063  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6064  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6065  {
6066  constexpr size_t block( BLOCK_SIZE );
6067 
6068  const size_t M( A.rows() );
6069  const size_t N( B.columns() );
6070 
6071  for( size_t jj=0UL; jj<N; jj+=block ) {
6072  const size_t jend( min( N, jj+block ) );
6073  for( size_t ii=0UL; ii<M; ii+=block ) {
6074  const size_t iend( min( M, ii+block ) );
6075  for( size_t j=jj; j<jend; ++j )
6076  {
6077  const size_t ibegin( ( IsLower<MT4>::value )
6078  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
6079  :( ii ) );
6080  const size_t ipos( ( IsUpper<MT4>::value )
6081  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
6082  :( iend ) );
6083 
6084  for( size_t i=ibegin; i<ipos; ++i ) {
6085  (~C)(i,j) += A(i,j) * B(j,j) * scalar;
6086  }
6087  }
6088  }
6089  }
6090  }
6091  //**********************************************************************************************
6092 
6093  //**Default addition assignment to row-major dense matrices (diagonal/general)******************
6107  template< typename MT3 // Type of the left-hand side target matrix
6108  , typename MT4 // Type of the left-hand side matrix operand
6109  , typename MT5 // Type of the right-hand side matrix operand
6110  , typename ST2 > // Type of the scalar value
6111  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
6112  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6113  {
6114  constexpr size_t block( BLOCK_SIZE );
6115 
6116  const size_t M( A.rows() );
6117  const size_t N( B.columns() );
6118 
6119  for( size_t ii=0UL; ii<M; ii+=block ) {
6120  const size_t iend( min( M, ii+block ) );
6121  for( size_t jj=0UL; jj<N; jj+=block ) {
6122  const size_t jend( min( N, jj+block ) );
6123  for( size_t i=ii; i<iend; ++i )
6124  {
6125  const size_t jbegin( ( IsUpper<MT5>::value )
6126  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
6127  :( jj ) );
6128  const size_t jpos( ( IsLower<MT5>::value )
6129  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
6130  :( jend ) );
6131 
6132  for( size_t j=jbegin; j<jpos; ++j ) {
6133  (~C)(i,j) += A(i,i) * B(i,j) * scalar;
6134  }
6135  }
6136  }
6137  }
6138  }
6139  //**********************************************************************************************
6140 
6141  //**Default addition assignment to column-major dense matrices (diagonal/general)***************
6155  template< typename MT3 // Type of the left-hand side target matrix
6156  , typename MT4 // Type of the left-hand side matrix operand
6157  , typename MT5 // Type of the right-hand side matrix operand
6158  , typename ST2 > // Type of the scalar value
6159  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
6160  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6161  {
6162  const size_t M( A.rows() );
6163  const size_t N( B.columns() );
6164 
6165  for( size_t j=0UL; j<N; ++j )
6166  {
6167  const size_t ibegin( ( IsLower<MT5>::value )
6168  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
6169  :( 0UL ) );
6170  const size_t iend( ( IsUpper<MT5>::value )
6171  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
6172  :( M ) );
6173  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
6174 
6175  const size_t inum( iend - ibegin );
6176  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
6177 
6178  for( size_t i=ibegin; i<ipos; i+=2UL ) {
6179  (~C)(i ,j) += A(i ,i ) * B(i ,j) * scalar;
6180  (~C)(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
6181  }
6182  if( ipos < iend ) {
6183  (~C)(ipos,j) += A(ipos,ipos) * B(ipos,j) * scalar;
6184  }
6185  }
6186  }
6187  //**********************************************************************************************
6188 
6189  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
6203  template< typename MT3 // Type of the left-hand side target matrix
6204  , typename MT4 // Type of the left-hand side matrix operand
6205  , typename MT5 // Type of the right-hand side matrix operand
6206  , typename ST2 > // Type of the scalar value
6207  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
6208  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6209  {
6210  for( size_t i=0UL; i<A.rows(); ++i ) {
6211  C(i,i) += A(i,i) * B(i,i) * scalar;
6212  }
6213  }
6214  //**********************************************************************************************
6215 
6216  //**Default addition assignment to dense matrices (small matrices)******************************
6230  template< typename MT3 // Type of the left-hand side target matrix
6231  , typename MT4 // Type of the left-hand side matrix operand
6232  , typename MT5 // Type of the right-hand side matrix operand
6233  , typename ST2 > // Type of the scalar value
6235  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6236  {
6237  selectDefaultAddAssignKernel( C, A, B, scalar );
6238  }
6239  //**********************************************************************************************
6240 
6241  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
6256  template< typename MT3 // Type of the left-hand side target matrix
6257  , typename MT4 // Type of the left-hand side matrix operand
6258  , typename MT5 // Type of the right-hand side matrix operand
6259  , typename ST2 > // Type of the scalar value
6261  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6262  {
6263  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
6264 
6265  const size_t M( A.rows() );
6266  const size_t N( B.columns() );
6267  const size_t K( A.columns() );
6268 
6269  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
6270 
6271  size_t i( 0UL );
6272 
6273  for( ; (i+2UL) <= M; i+=2UL )
6274  {
6275  const size_t jend( LOW ? i+2UL : N );
6276  size_t j( UPP ? i : 0UL );
6277 
6278  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
6279  {
6280  const size_t kbegin( ( IsUpper<MT4>::value )
6281  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6282  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6283  const size_t kend( ( IsLower<MT4>::value )
6284  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
6285  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
6286 
6287  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6288  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6289 
6290  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6291  size_t k( kbegin );
6292 
6293  for( ; k<kpos; k+=SIMDSIZE ) {
6294  const SIMDType a1( A.load(i ,k) );
6295  const SIMDType a2( A.load(i+1UL,k) );
6296  const SIMDType b1( B.load(k,j ) );
6297  const SIMDType b2( B.load(k,j+1UL) );
6298  const SIMDType b3( B.load(k,j+2UL) );
6299  const SIMDType b4( B.load(k,j+3UL) );
6300  xmm1 += a1 * b1;
6301  xmm2 += a1 * b2;
6302  xmm3 += a1 * b3;
6303  xmm4 += a1 * b4;
6304  xmm5 += a2 * b1;
6305  xmm6 += a2 * b2;
6306  xmm7 += a2 * b3;
6307  xmm8 += a2 * b4;
6308  }
6309 
6310  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6311  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6312  (~C)(i ,j+2UL) += sum( xmm3 ) * scalar;
6313  (~C)(i ,j+3UL) += sum( xmm4 ) * scalar;
6314  (~C)(i+1UL,j ) += sum( xmm5 ) * scalar;
6315  (~C)(i+1UL,j+1UL) += sum( xmm6 ) * scalar;
6316  (~C)(i+1UL,j+2UL) += sum( xmm7 ) * scalar;
6317  (~C)(i+1UL,j+3UL) += sum( xmm8 ) * scalar;
6318 
6319  for( ; remainder && k<kend; ++k ) {
6320  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6321  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6322  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL) * scalar;
6323  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL) * scalar;
6324  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6325  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6326  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL) * scalar;
6327  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL) * scalar;
6328  }
6329  }
6330 
6331  for( ; (j+2UL) <= jend; j+=2UL )
6332  {
6333  const size_t kbegin( ( IsUpper<MT4>::value )
6334  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6335  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6336  const size_t kend( ( IsLower<MT4>::value )
6337  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
6338  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6339 
6340  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6341  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6342 
6343  SIMDType xmm1, xmm2, xmm3, xmm4;
6344  size_t k( kbegin );
6345 
6346  for( ; k<kpos; k+=SIMDSIZE ) {
6347  const SIMDType a1( A.load(i ,k) );
6348  const SIMDType a2( A.load(i+1UL,k) );
6349  const SIMDType b1( B.load(k,j ) );
6350  const SIMDType b2( B.load(k,j+1UL) );
6351  xmm1 += a1 * b1;
6352  xmm2 += a1 * b2;
6353  xmm3 += a2 * b1;
6354  xmm4 += a2 * b2;
6355  }
6356 
6357  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6358  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6359  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6360  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6361 
6362  for( ; remainder && k<kend; ++k ) {
6363  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6364  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6365  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6366  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6367  }
6368  }
6369 
6370  if( j < jend )
6371  {
6372  const size_t kbegin( ( IsUpper<MT4>::value )
6373  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6374  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6375  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
6376 
6377  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6378  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6379 
6380  SIMDType xmm1, xmm2;
6381  size_t k( kbegin );
6382 
6383  for( ; k<kpos; k+=SIMDSIZE ) {
6384  const SIMDType b1( B.load(k,j) );
6385  xmm1 += A.load(i ,k) * b1;
6386  xmm2 += A.load(i+1UL,k) * b1;
6387  }
6388 
6389  (~C)(i ,j) += sum( xmm1 ) * scalar;
6390  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6391 
6392  for( ; remainder && k<kend; ++k ) {
6393  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
6394  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
6395  }
6396  }
6397  }
6398 
6399  if( i < M )
6400  {
6401  const size_t jend( LOW ? i+1UL : N );
6402  size_t j( UPP ? i : 0UL );
6403 
6404  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
6405  {
6406  const size_t kbegin( ( IsUpper<MT4>::value )
6407  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6408  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6409  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
6410 
6411  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6412  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6413 
6414  SIMDType xmm1, xmm2, xmm3, xmm4;
6415  size_t k( kbegin );
6416 
6417  for( ; k<kpos; k+=SIMDSIZE ) {
6418  const SIMDType a1( A.load(i,k) );
6419  xmm1 += a1 * B.load(k,j );
6420  xmm2 += a1 * B.load(k,j+1UL);
6421  xmm3 += a1 * B.load(k,j+2UL);
6422  xmm4 += a1 * B.load(k,j+3UL);
6423  }
6424 
6425  (~C)(i,j ) += sum( xmm1 ) * scalar;
6426  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6427  (~C)(i,j+2UL) += sum( xmm3 ) * scalar;
6428  (~C)(i,j+3UL) += sum( xmm4 ) * scalar;
6429 
6430  for( ; remainder && k<kend; ++k ) {
6431  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
6432  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
6433  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL) * scalar;
6434  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL) * scalar;
6435  }
6436  }
6437 
6438  for( ; (j+2UL) <= jend; j+=2UL )
6439  {
6440  const size_t kbegin( ( IsUpper<MT4>::value )
6441  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6442  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6443  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
6444 
6445  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6446  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6447 
6448  SIMDType xmm1, xmm2;
6449  size_t k( kbegin );
6450 
6451  for( ; k<kpos; k+=SIMDSIZE ) {
6452  const SIMDType a1( A.load(i,k) );
6453  xmm1 += a1 * B.load(k,j );
6454  xmm2 += a1 * B.load(k,j+1UL);
6455  }
6456 
6457  (~C)(i,j ) += sum( xmm1 ) * scalar;
6458  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6459 
6460  for( ; remainder && k<kend; ++k ) {
6461  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
6462  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
6463  }
6464  }
6465 
6466  if( j < jend )
6467  {
6468  const size_t kbegin( ( IsUpper<MT4>::value )
6469  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6470  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6471 
6472  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
6473  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6474 
6475  SIMDType xmm1;
6476  size_t k( kbegin );
6477 
6478  for( ; k<kpos; k+=SIMDSIZE ) {
6479  xmm1 += A.load(i,k) * B.load(k,j);
6480  }
6481 
6482  (~C)(i,j) += sum( xmm1 ) * scalar;
6483 
6484  for( ; remainder && k<K; ++k ) {
6485  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
6486  }
6487  }
6488  }
6489  }
6490  //**********************************************************************************************
6491 
6492  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
6507  template< typename MT3 // Type of the left-hand side target matrix
6508  , typename MT4 // Type of the left-hand side matrix operand
6509  , typename MT5 // Type of the right-hand side matrix operand
6510  , typename ST2 > // Type of the scalar value
6512  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6513  {
6514  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
6515 
6516  const size_t M( A.rows() );
6517  const size_t N( B.columns() );
6518  const size_t K( A.columns() );
6519 
6520  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
6521 
6522  size_t i( 0UL );
6523 
6524  for( ; !LOW && !UPP && (i+4UL) <= M; i+=4UL )
6525  {
6526  size_t j( 0UL );
6527 
6528  for( ; (j+2UL) <= N; j+=2UL )
6529  {
6530  const size_t kbegin( ( IsUpper<MT4>::value )
6531  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6532  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6533  const size_t kend( ( IsLower<MT4>::value )
6534  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
6535  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6536 
6537  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6538  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6539 
6540  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6541  size_t k( kbegin );
6542 
6543  for( ; k<kpos; k+=SIMDSIZE ) {
6544  const SIMDType a1( A.load(i ,k) );
6545  const SIMDType a2( A.load(i+1UL,k) );
6546  const SIMDType a3( A.load(i+2UL,k) );
6547  const SIMDType a4( A.load(i+3UL,k) );
6548  const SIMDType b1( B.load(k,j ) );
6549  const SIMDType b2( B.load(k,j+1UL) );
6550  xmm1 += a1 * b1;
6551  xmm2 += a1 * b2;
6552  xmm3 += a2 * b1;
6553  xmm4 += a2 * b2;
6554  xmm5 += a3 * b1;
6555  xmm6 += a3 * b2;
6556  xmm7 += a4 * b1;
6557  xmm8 += a4 * b2;
6558  }
6559 
6560  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6561  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6562  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6563  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6564  (~C)(i+2UL,j ) += sum( xmm5 ) * scalar;
6565  (~C)(i+2UL,j+1UL) += sum( xmm6 ) * scalar;
6566  (~C)(i+3UL,j ) += sum( xmm7 ) * scalar;
6567  (~C)(i+3UL,j+1UL) += sum( xmm8 ) * scalar;
6568 
6569  for( ; remainder && k<kend; ++k ) {
6570  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6571  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6572  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6573  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6574  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j ) * scalar;
6575  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL) * scalar;
6576  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j ) * scalar;
6577  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL) * scalar;
6578  }
6579  }
6580 
6581  if( j < N )
6582  {
6583  const size_t kbegin( ( IsUpper<MT4>::value )
6584  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6585  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6586  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
6587 
6588  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6589  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6590 
6591  SIMDType xmm1, xmm2, xmm3, xmm4;
6592  size_t k( kbegin );
6593 
6594  for( ; k<kpos; k+=SIMDSIZE ) {
6595  const SIMDType b1( B.load(k,j) );
6596  xmm1 += A.load(i ,k) * b1;
6597  xmm2 += A.load(i+1UL,k) * b1;
6598  xmm3 += A.load(i+2UL,k) * b1;
6599  xmm4 += A.load(i+3UL,k) * b1;
6600  }
6601 
6602  (~C)(i ,j) += sum( xmm1 ) * scalar;
6603  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6604  (~C)(i+2UL,j) += sum( xmm3 ) * scalar;
6605  (~C)(i+3UL,j) += sum( xmm4 ) * scalar;
6606 
6607  for( ; remainder && k<kend; ++k ) {
6608  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
6609  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
6610  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j) * scalar;
6611  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j) * scalar;
6612  }
6613  }
6614  }
6615 
6616  for( ; (i+2UL) <= M; i+=2UL )
6617  {
6618  const size_t jend( LOW ? i+2UL : N );
6619  size_t j( UPP ? i : 0UL );
6620 
6621  for( ; (j+2UL) <= jend; j+=2UL )
6622  {
6623  const size_t kbegin( ( IsUpper<MT4>::value )
6624  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6625  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6626  const size_t kend( ( IsLower<MT4>::value )
6627  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
6628  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6629 
6630  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6631  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6632 
6633  SIMDType xmm1, xmm2, xmm3, xmm4;
6634  size_t k( kbegin );
6635 
6636  for( ; k<kpos; k+=SIMDSIZE ) {
6637  const SIMDType a1( A.load(i ,k) );
6638  const SIMDType a2( A.load(i+1UL,k) );
6639  const SIMDType b1( B.load(k,j ) );
6640  const SIMDType b2( B.load(k,j+1UL) );
6641  xmm1 += a1 * b1;
6642  xmm2 += a1 * b2;
6643  xmm3 += a2 * b1;
6644  xmm4 += a2 * b2;
6645  }
6646 
6647  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6648  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6649  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6650  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6651 
6652  for( ; remainder && k<kend; ++k ) {
6653  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6654  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6655  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6656  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6657  }
6658  }
6659 
6660  if( j < jend )
6661  {
6662  const size_t kbegin( ( IsUpper<MT4>::value )
6663  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6664  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6665  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
6666 
6667  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6668  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6669 
6670  SIMDType xmm1, xmm2;
6671  size_t k( kbegin );
6672 
6673  for( ; k<kpos; k+=SIMDSIZE ) {
6674  const SIMDType b1( B.load(k,j) );
6675  xmm1 += A.load(i ,k) * b1;
6676  xmm2 += A.load(i+1UL,k) * b1;
6677  }
6678 
6679  (~C)(i ,j) += sum( xmm1 ) * scalar;
6680  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6681 
6682  for( ; remainder && k<kend; ++k ) {
6683  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
6684  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
6685  }
6686  }
6687  }
6688 
6689  if( i < M )
6690  {
6691  const size_t jend( LOW ? i+1UL : N );
6692  size_t j( UPP ? i : 0UL );
6693 
6694  for( ; (j+2UL) <= jend; j+=2UL )
6695  {
6696  const size_t kbegin( ( IsUpper<MT4>::value )
6697  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6698  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6699  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
6700 
6701  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6702  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6703 
6704  SIMDType xmm1, xmm2;
6705  size_t k( kbegin );
6706 
6707  for( ; k<kpos; k+=SIMDSIZE ) {
6708  const SIMDType a1( A.load(i,k) );
6709  xmm1 += a1 * B.load(k,j );
6710  xmm2 += a1 * B.load(k,j+1UL);
6711  }
6712 
6713  (~C)(i,j ) += sum( xmm1 ) * scalar;
6714  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6715 
6716  for( ; remainder && k<kend; ++k ) {
6717  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
6718  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
6719  }
6720  }
6721 
6722  if( j < jend )
6723  {
6724  const size_t kbegin( ( IsUpper<MT4>::value )
6725  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6726  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6727 
6728  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
6729  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6730 
6731  SIMDType xmm1;
6732  size_t k( kbegin );
6733 
6734  for( ; k<kpos; k+=SIMDSIZE ) {
6735  xmm1 += A.load(i,k) * B.load(k,j);
6736  }
6737 
6738  (~C)(i,j) += sum( xmm1 ) * scalar;
6739 
6740  for( ; remainder && k<K; ++k ) {
6741  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
6742  }
6743  }
6744  }
6745  }
6746  //**********************************************************************************************
6747 
6748  //**Default addition assignment to dense matrices (large matrices)******************************
6762  template< typename MT3 // Type of the left-hand side target matrix
6763  , typename MT4 // Type of the left-hand side matrix operand
6764  , typename MT5 // Type of the right-hand side matrix operand
6765  , typename ST2 > // Type of the scalar value
6767  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6768  {
6769  selectDefaultAddAssignKernel( C, A, B, scalar );
6770  }
6771  //**********************************************************************************************
6772 
6773  //**Vectorized default addition assignment to dense matrices (large matrices)*******************
6788  template< typename MT3 // Type of the left-hand side target matrix
6789  , typename MT4 // Type of the left-hand side matrix operand
6790  , typename MT5 // Type of the right-hand side matrix operand
6791  , typename ST2 > // Type of the scalar value
6793  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6794  {
6795  if( LOW )
6796  lmmm( C, A, B, scalar, ST2(1) );
6797  else if( UPP )
6798  ummm( C, A, B, scalar, ST2(1) );
6799  else
6800  mmm( C, A, B, scalar, ST2(1) );
6801  }
6802  //**********************************************************************************************
6803 
6804  //**BLAS-based addition assignment to dense matrices (default)**********************************
6818  template< typename MT3 // Type of the left-hand side target matrix
6819  , typename MT4 // Type of the left-hand side matrix operand
6820  , typename MT5 // Type of the right-hand side matrix operand
6821  , typename ST2 > // Type of the scalar value
6823  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6824  {
6825  selectLargeAddAssignKernel( C, A, B, scalar );
6826  }
6827  //**********************************************************************************************
6828 
6829  //**BLAS-based addition assignment to dense matrices********************************************
6830 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
6831 
6844  template< typename MT3 // Type of the left-hand side target matrix
6845  , typename MT4 // Type of the left-hand side matrix operand
6846  , typename MT5 // Type of the right-hand side matrix operand
6847  , typename ST2 > // Type of the scalar value
6849  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6850  {
6851  using ET = ElementType_<MT3>;
6852 
6853  if( IsTriangular<MT4>::value ) {
6854  ResultType_<MT3> tmp( serial( B ) );
6855  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6856  addAssign( C, tmp );
6857  }
6858  else if( IsTriangular<MT5>::value ) {
6859  ResultType_<MT3> tmp( serial( A ) );
6860  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6861  addAssign( C, tmp );
6862  }
6863  else {
6864  gemm( C, A, B, ET(scalar), ET(1) );
6865  }
6866  }
6867 #endif
6868  //**********************************************************************************************
6869 
6870  //**Addition assignment to sparse matrices******************************************************
6871  // No special implementation for the addition assignment to sparse matrices.
6872  //**********************************************************************************************
6873 
6874  //**Subtraction assignment to dense matrices****************************************************
6886  template< typename MT // Type of the target dense matrix
6887  , bool SO > // Storage order of the target dense matrix
6888  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
6889  {
6891 
6892  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6893  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6894 
6895  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
6896  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
6897 
6898  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
6899  return;
6900  }
6901 
6902  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
6903  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
6904 
6905  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
6906  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
6907  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
6908  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
6909  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
6910  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
6911 
6912  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
6913  }
6914  //**********************************************************************************************
6915 
6916  //**Subtraction assignment to dense matrices (kernel selection)*********************************
6927  template< typename MT3 // Type of the left-hand side target matrix
6928  , typename MT4 // Type of the left-hand side matrix operand
6929  , typename MT5 // Type of the right-hand side matrix operand
6930  , typename ST2 > // Type of the scalar value
6931  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6932  {
6934  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
6935  selectSmallSubAssignKernel( C, A, B, scalar );
6936  else
6937  selectBlasSubAssignKernel( C, A, B, scalar );
6938  }
6939  //**********************************************************************************************
6940 
6941  //**Default subtraction assignment to dense matrices (general/general)**************************
6955  template< typename MT3 // Type of the left-hand side target matrix
6956  , typename MT4 // Type of the left-hand side matrix operand
6957  , typename MT5 // Type of the right-hand side matrix operand
6958  , typename ST2 > // Type of the scalar value
6959  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
6960  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6961  {
6962  const ResultType tmp( serial( A * B * scalar ) );
6963  subAssign( C, tmp );
6964  }
6965  //**********************************************************************************************
6966 
6967  //**Default subtraction assignment to row-major dense matrices (general/diagonal)***************
6981  template< typename MT3 // Type of the left-hand side target matrix
6982  , typename MT4 // Type of the left-hand side matrix operand
6983  , typename MT5 // Type of the right-hand side matrix operand
6984  , typename ST2 > // Type of the scalar value
6985  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6986  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6987  {
6988  const size_t M( A.rows() );
6989  const size_t N( B.columns() );
6990 
6991  for( size_t i=0UL; i<M; ++i )
6992  {
6993  const size_t jbegin( ( IsUpper<MT4>::value )
6994  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
6995  :( 0UL ) );
6996  const size_t jend( ( IsLower<MT4>::value )
6997  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
6998  :( N ) );
6999  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
7000 
7001  const size_t jnum( jend - jbegin );
7002  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
7003 
7004  for( size_t j=jbegin; j<jpos; j+=2UL ) {
7005  (~C)(i,j ) -= A(i,j ) * B(j ,j ) * scalar;
7006  (~C)(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
7007  }
7008  if( jpos < jend ) {
7009  (~C)(i,jpos) -= A(i,jpos) * B(jpos,jpos) * scalar;
7010  }
7011  }
7012  }
7013  //**********************************************************************************************
7014 
7015  //**Default subtraction assignment to column-major dense matrices (general/diagonal)************
7029  template< typename MT3 // Type of the left-hand side target matrix
7030  , typename MT4 // Type of the left-hand side matrix operand
7031  , typename MT5 // Type of the right-hand side matrix operand
7032  , typename ST2 > // Type of the scalar value
7033  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
7034  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7035  {
7036  constexpr size_t block( BLOCK_SIZE );
7037 
7038  const size_t M( A.rows() );
7039  const size_t N( B.columns() );
7040 
7041  for( size_t jj=0UL; jj<N; jj+=block ) {
7042  const size_t jend( min( N, jj+block ) );
7043  for( size_t ii=0UL; ii<M; ii+=block ) {
7044  const size_t iend( min( M, ii+block ) );
7045  for( size_t j=jj; j<jend; ++j )
7046  {
7047  const size_t ibegin( ( IsLower<MT4>::value )
7048  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
7049  :( ii ) );
7050  const size_t ipos( ( IsUpper<MT4>::value )
7051  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
7052  :( iend ) );
7053 
7054  for( size_t i=ibegin; i<ipos; ++i ) {
7055  (~C)(i,j) -= A(i,j) * B(j,j) * scalar;
7056  }
7057  }
7058  }
7059  }
7060  }
7061  //**********************************************************************************************
7062 
7063  //**Default subtraction assignment to row-major dense matrices (diagonal/general)***************
7078  template< typename MT3 // Type of the left-hand side target matrix
7079  , typename MT4 // Type of the left-hand side matrix operand
7080  , typename MT5 // Type of the right-hand side matrix operand
7081  , typename ST2 > // Type of the scalar value
7082  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
7083  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7084  {
7085  constexpr size_t block( BLOCK_SIZE );
7086 
7087  const size_t M( A.rows() );
7088  const size_t N( B.columns() );
7089 
7090  for( size_t ii=0UL; ii<M; ii+=block ) {
7091  const size_t iend( min( M, ii+block ) );
7092  for( size_t jj=0UL; jj<N; jj+=block ) {
7093  const size_t jend( min( N, jj+block ) );
7094  for( size_t i=ii; i<iend; ++i )
7095  {
7096  const size_t jbegin( ( IsUpper<MT5>::value )
7097  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
7098  :( jj ) );
7099  const size_t jpos( ( IsLower<MT5>::value )
7100  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
7101  :( jend ) );
7102 
7103  for( size_t j=jbegin; j<jpos; ++j ) {
7104  (~C)(i,j) -= A(i,i) * B(i,j) * scalar;
7105  }
7106  }
7107  }
7108  }
7109  }
7110  //**********************************************************************************************
7111 
7112  //**Default subtraction assignment to column-major dense matrices (diagonal/general)************
7127  template< typename MT3 // Type of the left-hand side target matrix
7128  , typename MT4 // Type of the left-hand side matrix operand
7129  , typename MT5 // Type of the right-hand side matrix operand
7130  , typename ST2 > // Type of the scalar value
7131  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
7132  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7133  {
7134  const size_t M( A.rows() );
7135  const size_t N( B.columns() );
7136 
7137  for( size_t j=0UL; j<N; ++j )
7138  {
7139  const size_t ibegin( ( IsLower<MT5>::value )
7140  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
7141  :( 0UL ) );
7142  const size_t iend( ( IsUpper<MT5>::value )
7143  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
7144  :( M ) );
7145  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
7146 
7147  const size_t inum( iend - ibegin );
7148  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
7149 
7150  for( size_t i=ibegin; i<ipos; i+=2UL ) {
7151  (~C)(i ,j) -= A(i ,i ) * B(i ,j) * scalar;
7152  (~C)(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
7153  }
7154  if( ipos < iend ) {
7155  (~C)(ipos,j) -= A(ipos,ipos) * B(ipos,j) * scalar;
7156  }
7157  }
7158  }
7159  //**********************************************************************************************
7160 
7161  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
7175  template< typename MT3 // Type of the left-hand side target matrix
7176  , typename MT4 // Type of the left-hand side matrix operand
7177  , typename MT5 // Type of the right-hand side matrix operand
7178  , typename ST2 > // Type of the scalar value
7179  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
7180  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7181  {
7182  for( size_t i=0UL; i<A.rows(); ++i ) {
7183  C(i,i) -= A(i,i) * B(i,i) * scalar;
7184  }
7185  }
7186  //**********************************************************************************************
7187 
7188  //**Default subtraction assignment to dense matrices (small matrices)***************************
7202  template< typename MT3 // Type of the left-hand side target matrix
7203  , typename MT4 // Type of the left-hand side matrix operand
7204  , typename MT5 // Type of the right-hand side matrix operand
7205  , typename ST2 > // Type of the scalar value
7207  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7208  {
7209  selectDefaultSubAssignKernel( C, A, B, scalar );
7210  }
7211  //**********************************************************************************************
7212 
7213  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
7228  template< typename MT3 // Type of the left-hand side target matrix
7229  , typename MT4 // Type of the left-hand side matrix operand
7230  , typename MT5 // Type of the right-hand side matrix operand
7231  , typename ST2 > // Type of the scalar value
7233  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7234  {
7235  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
7236 
7237  const size_t M( A.rows() );
7238  const size_t N( B.columns() );
7239  const size_t K( A.columns() );
7240 
7241  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
7242 
7243  size_t i( 0UL );
7244 
7245  for( ; (i+2UL) <= M; i+=2UL )
7246  {
7247  const size_t jend( LOW ? i+2UL : N );
7248  size_t j( UPP ? i : 0UL );
7249 
7250  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
7251  {
7252  const size_t kbegin( ( IsUpper<MT4>::value )
7253  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7254  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7255  const size_t kend( ( IsLower<MT4>::value )
7256  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
7257  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
7258 
7259  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7260  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7261 
7262  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7263  size_t k( kbegin );
7264 
7265  for( ; k<kpos; k+=SIMDSIZE ) {
7266  const SIMDType a1( A.load(i ,k) );
7267  const SIMDType a2( A.load(i+1UL,k) );
7268  const SIMDType b1( B.load(k,j ) );
7269  const SIMDType b2( B.load(k,j+1UL) );
7270  const SIMDType b3( B.load(k,j+2UL) );
7271  const SIMDType b4( B.load(k,j+3UL) );
7272  xmm1 += a1 * b1;
7273  xmm2 += a1 * b2;
7274  xmm3 += a1 * b3;
7275  xmm4 += a1 * b4;
7276  xmm5 += a2 * b1;
7277  xmm6 += a2 * b2;
7278  xmm7 += a2 * b3;
7279  xmm8 += a2 * b4;
7280  }
7281 
7282  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7283  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7284  (~C)(i ,j+2UL) -= sum( xmm3 ) * scalar;
7285  (~C)(i ,j+3UL) -= sum( xmm4 ) * scalar;
7286  (~C)(i+1UL,j ) -= sum( xmm5 ) * scalar;
7287  (~C)(i+1UL,j+1UL) -= sum( xmm6 ) * scalar;
7288  (~C)(i+1UL,j+2UL) -= sum( xmm7 ) * scalar;
7289  (~C)(i+1UL,j+3UL) -= sum( xmm8 ) * scalar;
7290 
7291  for( ; remainder && k<kend; ++k ) {
7292  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7293  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7294  (~C)(i ,j+2UL) -= A(i ,k) * B(k,j+2UL) * scalar;
7295  (~C)(i ,j+3UL) -= A(i ,k) * B(k,j+3UL) * scalar;
7296  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7297  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7298  (~C)(i+1UL,j+2UL) -= A(i+1UL,k) * B(k,j+2UL) * scalar;
7299  (~C)(i+1UL,j+3UL) -= A(i+1UL,k) * B(k,j+3UL) * scalar;
7300  }
7301  }
7302 
7303  for( ; (j+2UL) <= jend; j+=2UL )
7304  {
7305  const size_t kbegin( ( IsUpper<MT4>::value )
7306  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7307  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7308  const size_t kend( ( IsLower<MT4>::value )
7309  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
7310  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7311 
7312  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7313  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7314 
7315  SIMDType xmm1, xmm2, xmm3, xmm4;
7316  size_t k( kbegin );
7317 
7318  for( ; k<kpos; k+=SIMDSIZE ) {
7319  const SIMDType a1( A.load(i ,k) );
7320  const SIMDType a2( A.load(i+1UL,k) );
7321  const SIMDType b1( B.load(k,j ) );
7322  const SIMDType b2( B.load(k,j+1UL) );
7323  xmm1 += a1 * b1;
7324  xmm2 += a1 * b2;
7325  xmm3 += a2 * b1;
7326  xmm4 += a2 * b2;
7327  }
7328 
7329  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7330  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7331  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7332  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7333 
7334  for( ; remainder && k<kend; ++k ) {
7335  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7336  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7337  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7338  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7339  }
7340  }
7341 
7342  if( j < jend )
7343  {
7344  const size_t kbegin( ( IsUpper<MT4>::value )
7345  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7346  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7347  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
7348 
7349  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7350  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7351 
7352  SIMDType xmm1, xmm2;
7353  size_t k( kbegin );
7354 
7355  for( ; k<kpos; k+=SIMDSIZE ) {
7356  const SIMDType b1( B.load(k,j) );
7357  xmm1 += A.load(i ,k) * b1;
7358  xmm2 += A.load(i+1UL,k) * b1;
7359  }
7360 
7361  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7362  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7363 
7364  for( ; remainder && k<kend; ++k ) {
7365  (~C)(i ,j) -= A(i ,k) * B(k,j) * scalar;
7366  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j) * scalar;
7367  }
7368  }
7369  }
7370 
7371  if( i < M )
7372  {
7373  const size_t jend( LOW ? i+1UL : N );
7374  size_t j( UPP ? i : 0UL );
7375 
7376  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
7377  {
7378  const size_t kbegin( ( IsUpper<MT4>::value )
7379  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7380  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7381  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
7382 
7383  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7384  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7385 
7386  SIMDType xmm1, xmm2, xmm3, xmm4;
7387  size_t k( kbegin );
7388 
7389  for( ; k<kpos; k+=SIMDSIZE ) {
7390  const SIMDType a1( A.load(i,k) );
7391  xmm1 += a1 * B.load(k,j );
7392  xmm2 += a1 * B.load(k,j+1UL);
7393  xmm3 += a1 * B.load(k,j+2UL);
7394  xmm4 += a1 * B.load(k,j+3UL);
7395  }
7396 
7397  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7398  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7399  (~C)(i,j+2UL) -= sum( xmm3 ) * scalar;
7400  (~C)(i,j+3UL) -= sum( xmm4 ) * scalar;
7401 
7402  for( ; remainder && k<kend; ++k ) {
7403  (~C)(i,j ) -= A(i,k) * B(k,j ) * scalar;
7404  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL) * scalar;
7405  (~C)(i,j+2UL) -= A(i,k) * B(k,j+2UL) * scalar;
7406  (~C)(i,j+3UL) -= A(i,k) * B(k,j+3UL) * scalar;
7407  }
7408  }
7409 
7410  for( ; (j+2UL) <= jend; j+=2UL )
7411  {
7412  const size_t kbegin( ( IsUpper<MT4>::value )
7413  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7414  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7415  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
7416 
7417  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7418  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7419 
7420  SIMDType xmm1, xmm2;
7421  size_t k( kbegin );
7422 
7423  for( ; k<kpos; k+=SIMDSIZE ) {
7424  const SIMDType a1( A.load(i,k) );
7425  xmm1 += a1 * B.load(k,j );
7426  xmm2 += a1 * B.load(k,j+1UL);
7427  }
7428 
7429  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7430  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7431 
7432  for( ; remainder && k<kend; ++k ) {
7433  (~C)(i,j ) -= A(i,k) * B(k,j ) * scalar;
7434  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL) * scalar;
7435  }
7436  }
7437 
7438  if( j < jend )
7439  {
7440  const size_t kbegin( ( IsUpper<MT4>::value )
7441  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7442  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7443 
7444  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
7445  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7446 
7447  SIMDType xmm1;
7448  size_t k( kbegin );
7449 
7450  for( ; k<kpos; k+=SIMDSIZE ) {
7451  xmm1 += A.load(i,k) * B.load(k,j);
7452  }
7453 
7454  (~C)(i,j) -= sum( xmm1 ) * scalar;
7455 
7456  for( ; remainder && k<K; ++k ) {
7457  (~C)(i,j) -= A(i,k) * B(k,j) * scalar;
7458  }
7459  }
7460  }
7461  }
7462  //**********************************************************************************************
7463 
7464  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
7479  template< typename MT3 // Type of the left-hand side target matrix
7480  , typename MT4 // Type of the left-hand side matrix operand
7481  , typename MT5 // Type of the right-hand side matrix operand
7482  , typename ST2 > // Type of the scalar value
7484  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7485  {
7486  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
7487 
7488  const size_t M( A.rows() );
7489  const size_t N( B.columns() );
7490  const size_t K( A.columns() );
7491 
7492  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
7493 
7494  size_t i( 0UL );
7495 
7496  for( ; !LOW && !UPP && (i+4UL) <= M; i+=4UL )
7497  {
7498  size_t j( 0UL );
7499 
7500  for( ; (j+2UL) <= N; j+=2UL )
7501  {
7502  const size_t kbegin( ( IsUpper<MT4>::value )
7503  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7504  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7505  const size_t kend( ( IsLower<MT4>::value )
7506  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
7507  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7508 
7509  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7510  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7511 
7512  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7513  size_t k( kbegin );
7514 
7515  for( ; k<kpos; k+=SIMDSIZE )
7516  {
7517  const SIMDType a1( A.load(i ,k) );
7518  const SIMDType a2( A.load(i+1UL,k) );
7519  const SIMDType a3( A.load(i+2UL,k) );
7520  const SIMDType a4( A.load(i+3UL,k) );
7521  const SIMDType b1( B.load(k,j ) );
7522  const SIMDType b2( B.load(k,j+1UL) );
7523  xmm1 += a1 * b1;
7524  xmm2 += a1 * b2;
7525  xmm3 += a2 * b1;
7526  xmm4 += a2 * b2;
7527  xmm5 += a3 * b1;
7528  xmm6 += a3 * b2;
7529  xmm7 += a4 * b1;
7530  xmm8 += a4 * b2;
7531  }
7532 
7533  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7534  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7535  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7536  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7537  (~C)(i+2UL,j ) -= sum( xmm5 ) * scalar;
7538  (~C)(i+2UL,j+1UL) -= sum( xmm6 ) * scalar;
7539  (~C)(i+3UL,j ) -= sum( xmm7 ) * scalar;
7540  (~C)(i+3UL,j+1UL) -= sum( xmm8 ) * scalar;
7541 
7542  for( ; remainder && k<kend; ++k ) {
7543  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7544  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7545  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7546  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7547  (~C)(i+2UL,j ) -= A(i+2UL,k) * B(k,j ) * scalar;
7548  (~C)(i+2UL,j+1UL) -= A(i+2UL,k) * B(k,j+1UL) * scalar;
7549  (~C)(i+3UL,j ) -= A(i+3UL,k) * B(k,j ) * scalar;
7550  (~C)(i+3UL,j+1UL) -= A(i+3UL,k) * B(k,j+1UL) * scalar;
7551  }
7552  }
7553 
7554  if( j < N )
7555  {
7556  const size_t kbegin( ( IsUpper<MT4>::value )
7557  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7558  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7559  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
7560 
7561  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7562  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7563 
7564  SIMDType xmm1, xmm2, xmm3, xmm4;
7565  size_t k( kbegin );
7566 
7567  for( ; k<kpos; k+=SIMDSIZE ) {
7568  const SIMDType b1( B.load(k,j) );
7569  xmm1 += A.load(i ,k) * b1;
7570  xmm2 += A.load(i+1UL,k) * b1;
7571  xmm3 += A.load(i+2UL,k) * b1;
7572  xmm4 += A.load(i+3UL,k) * b1;
7573  }
7574 
7575  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7576  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7577  (~C)(i+2UL,j) -= sum( xmm3 ) * scalar;
7578  (~C)(i+3UL,j) -= sum( xmm4 ) * scalar;
7579 
7580  for( ; remainder && k<kend; ++k ) {
7581  (~C)(i ,j) -= A(i ,k) * B(k,j) * scalar;
7582  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j) * scalar;
7583  (~C)(i+2UL,j) -= A(i+2UL,k) * B(k,j) * scalar;
7584  (~C)(i+3UL,j) -= A(i+3UL,k) * B(k,j) * scalar;
7585  }
7586  }
7587  }
7588 
7589  for( ; (i+2UL) <= M; i+=2UL )
7590  {
7591  const size_t jend( LOW ? i+2UL : N );
7592  size_t j( UPP ? i : 0UL );
7593 
7594  for( ; (j+2UL) <= jend; j+=2UL )
7595  {
7596  const size_t kbegin( ( IsUpper<MT4>::value )
7597  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7598  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7599  const size_t kend( ( IsLower<MT4>::value )
7600  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
7601  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7602 
7603  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7604  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7605 
7606  SIMDType xmm1, xmm2, xmm3, xmm4;
7607  size_t k( kbegin );
7608 
7609  for( ; k<kpos; k+=SIMDSIZE ) {
7610  const SIMDType a1( A.load(i ,k) );
7611  const SIMDType a2( A.load(i+1UL,k) );
7612  const SIMDType b1( B.load(k,j ) );
7613  const SIMDType b2( B.load(k,j+1UL) );
7614  xmm1 += a1 * b1;
7615  xmm2 += a1 * b2;
7616  xmm3 += a2 * b1;
7617  xmm4 += a2 * b2;
7618  }
7619 
7620  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7621  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7622  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7623  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7624 
7625  for( ; remainder && k<kend; ++k ) {
7626  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7627  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7628  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7629  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7630  }
7631  }
7632 
7633  if( j < jend )
7634  {
7635  const size_t kbegin( ( IsUpper<MT4>::value )
7636  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7637  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7638  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
7639 
7640  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7641  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7642 
7643  SIMDType xmm1, xmm2;
7644  size_t k( kbegin );
7645 
7646  for( ; k<kpos; k+=SIMDSIZE ) {
7647  const SIMDType b1( B.load(k,j) );
7648  xmm1 += A.load(i ,k) * b1;
7649  xmm2 += A.load(i+1UL,k) * b1;
7650  }
7651 
7652  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7653  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7654 
7655  for( ; remainder && k<kend; ++k ) {
7656  (~C)(i ,j) -= A(i ,k) * B(k,j) * scalar;
7657  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j) * scalar;
7658  }
7659  }
7660  }
7661 
7662  if( i < M )
7663  {
7664  const size_t jend( LOW ? i+1UL : N );
7665  size_t j( UPP ? i : 0UL );
7666 
7667  for( ; (j+2UL) <= jend; j+=2UL )
7668  {
7669  const size_t kbegin( ( IsUpper<MT4>::value )
7670  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7671  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7672  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
7673 
7674  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7675  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7676 
7677  SIMDType xmm1, xmm2;
7678  size_t k( kbegin );
7679 
7680  for( ; k<kpos; k+=SIMDSIZE ) {
7681  const SIMDType a1( A.load(i,k) );
7682  xmm1 += a1 * B.load(k,j );
7683  xmm2 += a1 * B.load(k,j+1UL);
7684  }
7685 
7686  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7687  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7688 
7689  for( ; remainder && k<kend; ++k ) {
7690  (~C)(i,j ) -= A(i,k) * B(k,j ) * scalar;
7691  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL) * scalar;
7692  }
7693  }
7694 
7695  if( j < jend )
7696  {
7697  const size_t kbegin( ( IsUpper<MT4>::value )
7698  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7699  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7700 
7701  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
7702  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7703 
7704  SIMDType xmm1;
7705  size_t k( kbegin );
7706 
7707  for( ; k<kpos; k+=SIMDSIZE ) {
7708  xmm1 += A.load(i,k) * B.load(k,j);
7709  }
7710 
7711  (~C)(i,j) -= sum( xmm1 ) * scalar;
7712 
7713  for( ; remainder && k<K; ++k ) {
7714  (~C)(i,j) -= A(i,k) * B(k,j) * scalar;
7715  }
7716  }
7717  }
7718  }
7719  //**********************************************************************************************
7720 
7721  //**Default subtraction assignment to dense matrices (large matrices)***************************
7735  template< typename MT3 // Type of the left-hand side target matrix
7736  , typename MT4 // Type of the left-hand side matrix operand
7737  , typename MT5 // Type of the right-hand side matrix operand
7738  , typename ST2 > // Type of the scalar value
7740  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7741  {
7742  selectDefaultSubAssignKernel( C, A, B, scalar );
7743  }
7744  //**********************************************************************************************
7745 
7746  //**Vectorized default subtraction assignment to dense matrices (large matrices)****************
7761  template< typename MT3 // Type of the left-hand side target matrix
7762  , typename MT4 // Type of the left-hand side matrix operand
7763  , typename MT5 // Type of the right-hand side matrix operand
7764  , typename ST2 > // Type of the scalar value
7766  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7767  {
7768  if( LOW )
7769  lmmm( C, A, B, -scalar, ST2(1) );
7770  else if( UPP )
7771  ummm( C, A, B, -scalar, ST2(1) );
7772  else
7773  mmm( C, A, B, -scalar, ST2(1) );
7774  }
7775  //**********************************************************************************************
7776 
7777  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
7791  template< typename MT3 // Type of the left-hand side target matrix
7792  , typename MT4 // Type of the left-hand side matrix operand
7793  , typename MT5 // Type of the right-hand side matrix operand
7794  , typename ST2 > // Type of the scalar value
7796  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7797  {
7798  selectLargeSubAssignKernel( C, A, B, scalar );
7799  }
7800  //**********************************************************************************************
7801 
7802  //**BLAS-based subraction assignment to dense matrices******************************************
7803 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
7804 
7817  template< typename MT3 // Type of the left-hand side target matrix
7818  , typename MT4 // Type of the left-hand side matrix operand
7819  , typename MT5 // Type of the right-hand side matrix operand
7820  , typename ST2 > // Type of the scalar value
7822  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7823  {
7824  using ET = ElementType_<MT3>;
7825 
7826  if( IsTriangular<MT4>::value ) {
7827  ResultType_<MT3> tmp( serial( B ) );
7828  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7829  subAssign( C, tmp );
7830  }
7831  else if( IsTriangular<MT5>::value ) {
7832  ResultType_<MT3> tmp( serial( A ) );
7833  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7834  subAssign( C, tmp );
7835  }
7836  else {
7837  gemm( C, A, B, ET(-scalar), ET(1) );
7838  }
7839  }
7840 #endif
7841  //**********************************************************************************************
7842 
7843  //**Subtraction assignment to sparse matrices***************************************************
7844  // No special implementation for the subtraction assignment to sparse matrices.
7845  //**********************************************************************************************
7846 
7847  //**Schur product assignment to dense matrices**************************************************
7859  template< typename MT // Type of the target dense matrix
7860  , bool SO > // Storage order of the target dense matrix
7861  friend inline void schurAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7862  {
7864 
7868 
7869  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7870  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7871 
7872  const ResultType tmp( serial( rhs ) );
7873  schurAssign( ~lhs, tmp );
7874  }
7875  //**********************************************************************************************
7876 
7877  //**Schur product assignment to sparse matrices*************************************************
7878  // No special implementation for the Schur product assignment to sparse matrices.
7879  //**********************************************************************************************
7880 
7881  //**Multiplication assignment to dense matrices*************************************************
7882  // No special implementation for the multiplication assignment to dense matrices.
7883  //**********************************************************************************************
7884 
7885  //**Multiplication assignment to sparse matrices************************************************
7886  // No special implementation for the multiplication assignment to sparse matrices.
7887  //**********************************************************************************************
7888 
7889  //**SMP assignment to dense matrices************************************************************
7904  template< typename MT // Type of the target dense matrix
7905  , bool SO > // Storage order of the target dense matrix
7907  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7908  {
7910 
7911  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7912  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7913 
7914  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7915  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7916 
7917  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
7918  return;
7919  }
7920  else if( left.columns() == 0UL ) {
7921  reset( ~lhs );
7922  return;
7923  }
7924 
7925  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7926  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7927 
7928  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7929  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7930  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7931  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7932  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7933  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7934 
7935  smpAssign( ~lhs, A * B * rhs.scalar_ );
7936  }
7937  //**********************************************************************************************
7938 
7939  //**SMP assignment to sparse matrices***********************************************************
7954  template< typename MT // Type of the target sparse matrix
7955  , bool SO > // Storage order of the target sparse matrix
7958  {
7960 
7962 
7969 
7970  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7971  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7972 
7973  const ForwardFunctor fwd;
7974 
7975  const TmpType tmp( rhs );
7976  smpAssign( ~lhs, fwd( tmp ) );
7977  }
7978  //**********************************************************************************************
7979 
7980  //**SMP addition assignment to dense matrices***************************************************
7995  template< typename MT // Type of the target dense matrix
7996  , bool SO > // Storage order of the target dense matrix
7999  {
8001 
8002  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8003  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8004 
8005  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
8006  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
8007 
8008  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
8009  return;
8010  }
8011 
8012  LT A( left ); // Evaluation of the left-hand side dense matrix operand
8013  RT B( right ); // Evaluation of the right-hand side dense matrix operand
8014 
8015  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
8016  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
8017  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
8018  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
8019  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
8020  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
8021 
8022  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
8023  }
8024  //**********************************************************************************************
8025 
8026  //**SMP addition assignment to sparse matrices**************************************************
8027  // No special implementation for the SMP addition assignment to sparse matrices.
8028  //**********************************************************************************************
8029 
8030  //**SMP subtraction assignment to dense matrices************************************************
8045  template< typename MT // Type of the target dense matrix
8046  , bool SO > // Storage order of the target dense matrix
8049  {
8051 
8052  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8053  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8054 
8055  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
8056  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
8057 
8058  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
8059  return;
8060  }
8061 
8062  LT A( left ); // Evaluation of the left-hand side dense matrix operand
8063  RT B( right ); // Evaluation of the right-hand side dense matrix operand
8064 
8065  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
8066  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
8067  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
8068  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
8069  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
8070  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
8071 
8072  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
8073  }
8074  //**********************************************************************************************
8075 
8076  //**SMP subtraction assignment to sparse matrices***********************************************
8077  // No special implementation for the SMP subtraction assignment to sparse matrices.
8078  //**********************************************************************************************
8079 
8080  //**SMP Schur product assignment to dense matrices**********************************************
8092  template< typename MT // Type of the target dense matrix
8093  , bool SO > // Storage order of the target dense matrix
8094  friend inline void smpSchurAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
8095  {
8097 
8101 
8102  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8103  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8104 
8105  const ResultType tmp( rhs );
8106  smpSchurAssign( ~lhs, tmp );
8107  }
8108  //**********************************************************************************************
8109 
8110  //**SMP Schur product assignment to sparse matrices*********************************************
8111  // No special implementation for the SMP Schur product assignment to sparse matrices.
8112  //**********************************************************************************************
8113 
8114  //**SMP multiplication assignment to dense matrices*********************************************
8115  // No special implementation for the SMP multiplication assignment to dense matrices.
8116  //**********************************************************************************************
8117 
8118  //**SMP multiplication assignment to sparse matrices********************************************
8119  // No special implementation for the SMP multiplication assignment to sparse matrices.
8120  //**********************************************************************************************
8121 
8122  //**Compile time checks*************************************************************************
8131  //**********************************************************************************************
8132 };
8134 //*************************************************************************************************
8135 
8136 
8137 
8138 
8139 //=================================================================================================
8140 //
8141 // GLOBAL BINARY ARITHMETIC OPERATORS
8142 //
8143 //=================================================================================================
8144 
8145 //*************************************************************************************************
8175 template< typename MT1 // Type of the left-hand side dense matrix
8176  , typename MT2 > // Type of the right-hand side dense matrix
8177 inline decltype(auto)
8178  operator*( const DenseMatrix<MT1,false>& lhs, const DenseMatrix<MT2,true>& rhs )
8179 {
8181 
8182  if( (~lhs).columns() != (~rhs).rows() ) {
8183  BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
8184  }
8185 
8187  return ReturnType( ~lhs, ~rhs );
8188 }
8189 //*************************************************************************************************
8190 
8191 
8192 
8193 
8194 //=================================================================================================
8195 //
8196 // GLOBAL FUNCTIONS
8197 //
8198 //=================================================================================================
8199 
8200 //*************************************************************************************************
8225 template< typename MT1 // Type of the left-hand side dense matrix
8226  , typename MT2 // Type of the right-hand side dense matrix
8227  , bool SF // Symmetry flag
8228  , bool HF // Hermitian flag
8229  , bool LF // Lower flag
8230  , bool UF > // Upper flag
8231 inline decltype(auto) declsym( const DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
8232 {
8234 
8235  if( !isSquare( dm ) ) {
8236  BLAZE_THROW_INVALID_ARGUMENT( "Invalid symmetric matrix specification" );
8237  }
8238 
8240  return ReturnType( dm.leftOperand(), dm.rightOperand() );
8241 }
8243 //*************************************************************************************************
8244 
8245 
8246 //*************************************************************************************************
8271 template< typename MT1 // Type of the left-hand side dense matrix
8272  , typename MT2 // Type of the right-hand side dense matrix
8273  , bool SF // Symmetry flag
8274  , bool HF // Hermitian flag
8275  , bool LF // Lower flag
8276  , bool UF > // Upper flag
8277 inline decltype(auto) declherm( const DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
8278 {
8280 
8281  if( !isSquare( dm ) ) {
8282  BLAZE_THROW_INVALID_ARGUMENT( "Invalid Hermitian matrix specification" );
8283  }
8284 
8286  return ReturnType( dm.leftOperand(), dm.rightOperand() );
8287 }
8289 //*************************************************************************************************
8290 
8291 
8292 //*************************************************************************************************
8317 template< typename MT1 // Type of the left-hand side dense matrix
8318  , typename MT2 // Type of the right-hand side dense matrix
8319  , bool SF // Symmetry flag
8320  , bool HF // Hermitian flag
8321  , bool LF // Lower flag
8322  , bool UF > // Upper flag
8323 inline decltype(auto) decllow( const DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
8324 {
8326 
8327  if( !isSquare( dm ) ) {
8328  BLAZE_THROW_INVALID_ARGUMENT( "Invalid lower matrix specification" );
8329  }
8330 
8332  return ReturnType( dm.leftOperand(), dm.rightOperand() );
8333 }
8335 //*************************************************************************************************
8336 
8337 
8338 //*************************************************************************************************
8363 template< typename MT1 // Type of the left-hand side dense matrix
8364  , typename MT2 // Type of the right-hand side dense matrix
8365  , bool SF // Symmetry flag
8366  , bool HF // Hermitian flag
8367  , bool LF // Lower flag
8368  , bool UF > // Upper flag
8369 inline decltype(auto) declupp( const DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
8370 {
8372 
8373  if( !isSquare( dm ) ) {
8374  BLAZE_THROW_INVALID_ARGUMENT( "Invalid upper matrix specification" );
8375  }
8376 
8378  return ReturnType( dm.leftOperand(), dm.rightOperand() );
8379 }
8381 //*************************************************************************************************
8382 
8383 
8384 //*************************************************************************************************
8409 template< typename MT1 // Type of the left-hand side dense matrix
8410  , typename MT2 // Type of the right-hand side dense matrix
8411  , bool SF // Symmetry flag
8412  , bool HF // Hermitian flag
8413  , bool LF // Lower flag
8414  , bool UF > // Upper flag
8415 inline decltype(auto) decldiag( const DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>& dm )
8416 {
8418 
8419  if( !isSquare( dm ) ) {
8420  BLAZE_THROW_INVALID_ARGUMENT( "Invalid diagonal matrix specification" );
8421  }
8422 
8424  return ReturnType( dm.leftOperand(), dm.rightOperand() );
8425 }
8427 //*************************************************************************************************
8428 
8429 
8430 
8431 
8432 //=================================================================================================
8433 //
8434 // ROWS SPECIALIZATIONS
8435 //
8436 //=================================================================================================
8437 
8438 //*************************************************************************************************
8440 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8441 struct Rows< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8442  : public Rows<MT1>
8443 {};
8445 //*************************************************************************************************
8446 
8447 
8448 
8449 
8450 //=================================================================================================
8451 //
8452 // COLUMNS SPECIALIZATIONS
8453 //
8454 //=================================================================================================
8455 
8456 //*************************************************************************************************
8458 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8459 struct Columns< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8460  : public Columns<MT2>
8461 {};
8463 //*************************************************************************************************
8464 
8465 
8466 
8467 
8468 //=================================================================================================
8469 //
8470 // ISALIGNED SPECIALIZATIONS
8471 //
8472 //=================================================================================================
8473 
8474 //*************************************************************************************************
8476 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8477 struct IsAligned< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8478  : public BoolConstant< And< IsAligned<MT1>, IsAligned<MT2> >::value >
8479 {};
8481 //*************************************************************************************************
8482 
8483 
8484 
8485 
8486 //=================================================================================================
8487 //
8488 // ISSYMMETRIC SPECIALIZATIONS
8489 //
8490 //=================================================================================================
8491 
8492 //*************************************************************************************************
8494 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8495 struct IsSymmetric< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8496  : public BoolConstant< Or< Bool<SF>
8497  , And< Bool<HF>
8498  , IsBuiltin< ElementType_< DMatTDMatMultExpr<MT1,MT2,false,true,false,false> > > >
8499  , And< Bool<LF>, Bool<UF> > >::value >
8500 {};
8502 //*************************************************************************************************
8503 
8504 
8505 
8506 
8507 //=================================================================================================
8508 //
8509 // ISHERMITIAN SPECIALIZATIONS
8510 //
8511 //=================================================================================================
8512 
8513 //*************************************************************************************************
8515 template< typename MT1, typename MT2, bool SF, bool LF, bool UF >
8516 struct IsHermitian< DMatTDMatMultExpr<MT1,MT2,SF,true,LF,UF> >
8517  : public TrueType
8518 {};
8520 //*************************************************************************************************
8521 
8522 
8523 
8524 
8525 //=================================================================================================
8526 //
8527 // ISLOWER SPECIALIZATIONS
8528 //
8529 //=================================================================================================
8530 
8531 //*************************************************************************************************
8533 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8534 struct IsLower< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8535  : public BoolConstant< Or< Bool<LF>
8536  , And< IsLower<MT1>, IsLower<MT2> >
8537  , And< Or< Bool<SF>, Bool<HF> >
8538  , IsUpper<MT1>, IsUpper<MT2> > >::value >
8539 {};
8541 //*************************************************************************************************
8542 
8543 
8544 
8545 
8546 //=================================================================================================
8547 //
8548 // ISUNILOWER SPECIALIZATIONS
8549 //
8550 //=================================================================================================
8551 
8552 //*************************************************************************************************
8554 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8555 struct IsUniLower< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8556  : public BoolConstant< Or< And< IsUniLower<MT1>, IsUniLower<MT2> >
8557  , And< Or< Bool<SF>, Bool<HF> >
8558  , IsUniUpper<MT1>, IsUniUpper<MT2> > >::value >
8559 {};
8561 //*************************************************************************************************
8562 
8563 
8564 
8565 
8566 //=================================================================================================
8567 //
8568 // ISSTRICTLYLOWER SPECIALIZATIONS
8569 //
8570 //=================================================================================================
8571 
8572 //*************************************************************************************************
8574 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8575 struct IsStrictlyLower< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8576  : public BoolConstant< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
8577  , And< IsStrictlyLower<MT2>, IsLower<MT1> >
8578  , And< Or< Bool<SF>, Bool<HF> >
8579  , Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
8580  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > > > >::value >
8581 {};
8583 //*************************************************************************************************
8584 
8585 
8586 
8587 
8588 //=================================================================================================
8589 //
8590 // ISUPPER SPECIALIZATIONS
8591 //
8592 //=================================================================================================
8593 
8594 //*************************************************************************************************
8596 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8597 struct IsUpper< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8598  : public BoolConstant< Or< Bool<UF>
8599  , And< IsUpper<MT1>, IsUpper<MT2> >
8600  , And< Or< Bool<SF>, Bool<HF> >
8601  , IsLower<MT1>, IsLower<MT2> > >::value >
8602 {};
8604 //*************************************************************************************************
8605 
8606 
8607 
8608 
8609 //=================================================================================================
8610 //
8611 // ISUNIUPPER SPECIALIZATIONS
8612 //
8613 //=================================================================================================
8614 
8615 //*************************************************************************************************
8617 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8618 struct IsUniUpper< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8619  : public BoolConstant< Or< And< IsUniUpper<MT1>, IsUniUpper<MT2> >
8620  , And< Or< Bool<SF>, Bool<HF> >
8621  , IsUniLower<MT1>, IsUniLower<MT2> > >::value >
8622 {};
8624 //*************************************************************************************************
8625 
8626 
8627 
8628 
8629 //=================================================================================================
8630 //
8631 // ISSTRICTLYUPPER SPECIALIZATIONS
8632 //
8633 //=================================================================================================
8634 
8635 //*************************************************************************************************
8637 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8638 struct IsStrictlyUpper< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8639  : public BoolConstant< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
8640  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> >
8641  , And< Or< Bool<SF>, Bool<HF> >
8642  , Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
8643  , And< IsStrictlyLower<MT2>, IsLower<MT1> > > > >::value >
8644 {};
8646 //*************************************************************************************************
8647 
8648 } // namespace blaze
8649 
8650 #endif
Flag for lower matrices.
Definition: DMatTDMatMultExpr.h:172
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Header file for auxiliary alias declarations.
Data type constraint.
Headerfile for the generic min algorithm.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:72
Constraint on the data type.
Header file for kernel specific block sizes.
ResultType_< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:150
decltype(auto) decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:996
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
Header file for the Rows type trait.
Header file for the IsUniUpper type trait.
EnableIf_< IsDenseMatrix< MT1 > > smpSchurAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP Schur product assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:196
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:87
Header file for basic type definitions.
Subvector< VT, AF > subvector(Vector< VT, TF > &vector, size_t index, size_t size)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:322
If_< IsExpression< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:265
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:164
Header file for the serial shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:262
Header file for the IsDiagonal type trait.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: DMatScalarMultExpr.h:547
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: DMatTDMatMultExpr.h:363
Generic wrapper for a compile time constant integral value.The IntegralConstant class template repres...
Definition: IntegralConstant.h:71
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the DeclUpp functor.
Header file for the IsSame and IsStrictlySame type traits.
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:198
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:172
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:316
RightOperand scalar_
Right-hand side scalar of the multiplication expression.
Definition: DMatScalarMultExpr.h:620
Header file for the dense matrix multiplication kernels.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:560
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: DMatScalarMultExpr.h:537
typename DisableIf< Condition, T >::Type DisableIf_
Auxiliary type for the DisableIf class template.The DisableIf_ alias declaration provides a convenien...
Definition: DisableIf.h:224
Base class for all matrix/scalar multiplication expression templates.The MatScalarMultExpr class serv...
Definition: MatScalarMultExpr.h:67
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1762
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:88
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:171
decltype(auto) declupp(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:1027
Flag for upper matrices.
Definition: DMatTDMatMultExpr.h:173
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:250
Column< MT > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:124
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member constant is set to true, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to false, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:140
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:88
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Header file for the IsUniLower type trait.
Header file for the IsBLASCompatible type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:343
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1809
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:133
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:78
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:129
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:465
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:379
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
Header file for the IsComplexDouble type trait.
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:300
CompositeType_< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:154
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
Row< MT > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:124
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
Constraint on the data type.
Flag for Hermitian matrices.
Definition: DMatTDMatMultExpr.h:171
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:72
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
MultTrait_< RT1, RT2 > ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:256
Compile time check for upper unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniUpper.h:86
Headerfile for the generic max algorithm.
Header file for the DisableIf class template.
CompositeType_< MT1 > CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:153
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsSymmetric type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the DeclLow functor.
Header file for the IsDouble type trait.
OppositeType_< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:257
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:102
Header file for the Or class template.
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:433
Expression object for dense matrix-scalar multiplications.The DMatScalarMultExpr class represents the...
Definition: DMatScalarMultExpr.h:110
IfTrue_< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:271
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
Header file for the Columns type trait.
Header file for the Not class template.
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:258
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:71
decltype(auto) decllow(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:1027
Header file for the IsLower type trait.
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:421
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:90
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBLASCompatible.h:80
Generic wrapper for the null function.
Definition: Noop.h:58
Header file for the IsTriangular type trait.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:261
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Header file for the exception macros of the math module.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:466
LeftOperand matrix_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatScalarMultExpr.h:619
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:264
Header file for the DeclDiag functor.
Constraint on the data type.
Header file for all forward declarations for expression class templates.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
If_< IsExpression< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:268
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:108
Compile time check for lower unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniLower.h:86
ElementType_< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:152
Header file for the conjugate shim.
ElementType_< ResultType > ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:259
typename T::LeftOperand LeftOperand_
Alias declaration for nested LeftOperand type definitions.The LeftOperand_ alias declaration provides...
Definition: Aliases.h:203
Header file for the HasConstDataAccess type trait.
System settings for the BLAS mode.
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatTDMatMultExpr.h:443
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:67
Header file for the IsSIMDCombinable type trait.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
BLAZE_ALWAYS_INLINE ValueType_< T > sum(const SIMDi8< T > &a) noexcept
Returns the sum of all elements in the 8-bit integral SIMD vector.
Definition: Reduction.h:65
Header file for the MatScalarMultExpr base class.
Flag for symmetric matrices.
Definition: DMatTDMatMultExpr.h:170
Header file for run time assertion macros.
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: DMatTDMatMultExpr.h:260
Utility type for generic codes.
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:154
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:61
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:94
ResultType_< MT1 > RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:149
Compile time type negation.The Not alias declaration negates the given compile time condition...
Definition: Not.h:70
RightOperand rightOperand() const noexcept
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:409
decltype(auto) declsym(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:1029
Compile time check for Hermitian matrices.This type trait tests whether or not the given template par...
Definition: IsHermitian.h:85
Compile time check for built-in data types.This type trait tests whether or not the given template pa...
Definition: IsBuiltin.h:75
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
decltype(auto) serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:819
Header file for the HasMutableDataAccess type trait.
Header file for the Noop functor.
#define BLAZE_CONSTRAINT_MUST_NOT_REQUIRE_EVALUATION(T)
Constraint on the data type.In case the given data type T requires an intermediate evaluation within ...
Definition: RequiresEvaluation.h:81
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the BLAS matrix/matrix multiplication kernels (gemv).This compilation switch e...
Definition: BLAS.h:152
Header file for BLAS triangular matrix/matrix multiplication functions (trmm)
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:224
typename T::OppositeType OppositeType_
Alias declaration for nested OppositeType type definitions.The OppositeType_ alias declaration provid...
Definition: Aliases.h:263
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
IfTrue_< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:274
LeftOperand leftOperand() const noexcept
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:399
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
const Type & ReturnType
Return type for expression template evaluations.
Definition: CompressedMatrix.h:3082
decltype(auto) declherm(const DenseMatrix< MT, SO > &dm)
Declares the given dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:1029
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:315
Compile time check for complex types.This type trait tests whether or not the given template paramete...
Definition: IsComplex.h:76
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:64
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:66
Header file for BLAS general matrix/matrix multiplication functions (gemm)
Header file for the IsComplexFloat type trait.
ElementType_< RT1 > ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:151
Header file for the IntegralConstant class template.
Compile time evaluation of the number of columns of a matrix.The Columns type trait evaluates the num...
Definition: Columns.h:75
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
Compile time evaluation of the number of rows of a matrix.The Rows type trait evaluates the number of...
Definition: Rows.h:75
Header file for the IsComplex type trait.
Header file for the DeclHerm functor.
Header file for the complex data type.
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:143
typename T::RightOperand RightOperand_
Alias declaration for nested RightOperand type definitions.The RightOperand_ alias declaration provid...
Definition: Aliases.h:383
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:423
Header file for the IsUpper type trait.
decltype(auto) conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatMapExpr.h:1321
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:389
BLAZE_ALWAYS_INLINE bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:742
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the Bool class template.
Header file for the DeclSym functor.
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: DMatTDMatMultExpr.h:453
Header file for the TrueType type/value trait base class.
Header file for the IsExpression type trait class.
Header file for the function trace functionality.