DMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/blas/gemm.h>
44 #include <blaze/math/blas/trmm.h>
45 #include <blaze/math/Aliases.h>
51 #include <blaze/math/dense/MMM.h>
52 #include <blaze/math/Exception.h>
58 #include <blaze/math/Functions.h>
66 #include <blaze/math/shims/Reset.h>
68 #include <blaze/math/SIMD.h>
114 #include <blaze/system/BLAS.h>
115 #include <blaze/system/Blocking.h>
117 #include <blaze/system/Thresholds.h>
118 #include <blaze/util/Assert.h>
119 #include <blaze/util/Complex.h>
123 #include <blaze/util/DisableIf.h>
124 #include <blaze/util/EnableIf.h>
127 #include <blaze/util/InvalidType.h>
128 #include <blaze/util/mpl/And.h>
129 #include <blaze/util/mpl/Bool.h>
130 #include <blaze/util/mpl/If.h>
131 #include <blaze/util/mpl/Not.h>
132 #include <blaze/util/mpl/Or.h>
133 #include <blaze/util/TrueType.h>
134 #include <blaze/util/Types.h>
143 
144 
145 namespace blaze {
146 
147 //=================================================================================================
148 //
149 // CLASS DMATTDMATMULTEXPR
150 //
151 //=================================================================================================
152 
153 //*************************************************************************************************
160 template< typename MT1 // Type of the left-hand side dense matrix
161  , typename MT2 // Type of the right-hand side dense matrix
162  , bool SF // Symmetry flag
163  , bool HF // Hermitian flag
164  , bool LF // Lower flag
165  , bool UF > // Upper flag
166 class DMatTDMatMultExpr : public DenseMatrix< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, false >
167  , private MatMatMultExpr
168  , private Computation
169 {
170  private:
171  //**Type definitions****************************************************************************
178  //**********************************************************************************************
179 
180  //**********************************************************************************************
182  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
183  //**********************************************************************************************
184 
185  //**********************************************************************************************
187  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
188  //**********************************************************************************************
189 
190  //**********************************************************************************************
192  enum : bool {
193  SYM = ( SF && !( HF || LF || UF ) ),
194  HERM = ( HF && !( LF || UF ) ),
195  LOW = ( LF || ( ( SF || HF ) && UF ) ),
196  UPP = ( UF || ( ( SF || HF ) && LF ) )
197  };
198  //**********************************************************************************************
199 
200  //**********************************************************************************************
202 
206  template< typename T1, typename T2, typename T3 >
207  struct IsEvaluationRequired {
208  enum : bool { value = ( evaluateLeft || evaluateRight ) };
209  };
211  //**********************************************************************************************
212 
213  //**********************************************************************************************
215 
218  template< typename T1, typename T2, typename T3 >
219  struct UseBlasKernel {
221  !SYM && !HERM && !LOW && !UPP &&
226  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
231  IsSame< ElementType_<T1>, ElementType_<T3> >::value };
232  };
234  //**********************************************************************************************
235 
236  //**********************************************************************************************
238 
241  template< typename T1, typename T2, typename T3 >
242  struct UseVectorizedDefaultKernel {
243  enum : bool { value = useOptimizedKernels &&
245  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
248  , ElementType_<T3> >::value &&
251  };
253  //**********************************************************************************************
254 
255  //**********************************************************************************************
257 
260  typedef IfTrue_< HERM
261  , DeclHerm
262  , IfTrue_< SYM
263  , DeclSym
264  , IfTrue_< LOW
265  , IfTrue_< UPP
266  , DeclDiag
267  , DeclLow >
268  , IfTrue_< UPP
269  , DeclUpp
270  , Noop > > > > ForwardFunctor;
272  //**********************************************************************************************
273 
274  public:
275  //**Type definitions****************************************************************************
278 
284  typedef const ElementType ReturnType;
285  typedef const ResultType CompositeType;
286 
288  typedef If_< IsExpression<MT1>, const MT1, const MT1& > LeftOperand;
289 
291  typedef If_< IsExpression<MT2>, const MT2, const MT2& > RightOperand;
292 
295 
298  //**********************************************************************************************
299 
300  //**Compilation flags***************************************************************************
302  enum : bool { simdEnabled = !IsDiagonal<MT1>::value && !IsDiagonal<MT2>::value &&
303  MT1::simdEnabled && MT2::simdEnabled &&
306 
308  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
309  !evaluateRight && MT2::smpAssignable };
310  //**********************************************************************************************
311 
312  //**SIMD properties*****************************************************************************
314  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
315  //**********************************************************************************************
316 
317  //**Constructor*********************************************************************************
323  explicit inline DMatTDMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
324  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
325  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
326  {
327  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
328  }
329  //**********************************************************************************************
330 
331  //**Access operator*****************************************************************************
338  inline ReturnType operator()( size_t i, size_t j ) const {
339  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
340  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
341 
342  if( IsDiagonal<MT1>::value ) {
343  return lhs_(i,i) * rhs_(i,j);
344  }
345  else if( IsDiagonal<MT2>::value ) {
346  return lhs_(i,j) * rhs_(j,j);
347  }
349  const size_t begin( ( IsUpper<MT1>::value )
350  ?( ( IsLower<MT2>::value )
351  ?( max( ( IsStrictlyUpper<MT1>::value ? i+1UL : i )
352  , ( IsStrictlyLower<MT2>::value ? j+1UL : j ) ) )
353  :( IsStrictlyUpper<MT1>::value ? i+1UL : i ) )
354  :( ( IsLower<MT2>::value )
355  ?( IsStrictlyLower<MT2>::value ? j+1UL : j )
356  :( 0UL ) ) );
357  const size_t end( ( IsLower<MT1>::value )
358  ?( ( IsUpper<MT2>::value )
359  ?( min( ( IsStrictlyLower<MT1>::value ? i : i+1UL )
360  , ( IsStrictlyUpper<MT2>::value ? j : j+1UL ) ) )
361  :( IsStrictlyLower<MT1>::value ? i : i+1UL ) )
362  :( ( IsUpper<MT2>::value )
363  ?( IsStrictlyUpper<MT2>::value ? j : j+1UL )
364  :( lhs_.columns() ) ) );
365 
366  if( begin >= end ) return ElementType();
367 
368  const size_t n( end - begin );
369 
370  return subvector( row( lhs_, i ), begin, n ) * subvector( column( rhs_, j ), begin, n );
371  }
372  else {
373  return row( lhs_, i ) * column( rhs_, j );
374  }
375  }
376  //**********************************************************************************************
377 
378  //**At function*********************************************************************************
386  inline ReturnType at( size_t i, size_t j ) const {
387  if( i >= lhs_.rows() ) {
388  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
389  }
390  if( j >= rhs_.columns() ) {
391  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
392  }
393  return (*this)(i,j);
394  }
395  //**********************************************************************************************
396 
397  //**Rows function*******************************************************************************
402  inline size_t rows() const noexcept {
403  return lhs_.rows();
404  }
405  //**********************************************************************************************
406 
407  //**Columns function****************************************************************************
412  inline size_t columns() const noexcept {
413  return rhs_.columns();
414  }
415  //**********************************************************************************************
416 
417  //**Left operand access*************************************************************************
422  inline LeftOperand leftOperand() const noexcept {
423  return lhs_;
424  }
425  //**********************************************************************************************
426 
427  //**Right operand access************************************************************************
432  inline RightOperand rightOperand() const noexcept {
433  return rhs_;
434  }
435  //**********************************************************************************************
436 
437  //**********************************************************************************************
443  template< typename T >
444  inline bool canAlias( const T* alias ) const noexcept {
445  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
446  }
447  //**********************************************************************************************
448 
449  //**********************************************************************************************
455  template< typename T >
456  inline bool isAliased( const T* alias ) const noexcept {
457  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
458  }
459  //**********************************************************************************************
460 
461  //**********************************************************************************************
466  inline bool isAligned() const noexcept {
467  return lhs_.isAligned() && rhs_.isAligned();
468  }
469  //**********************************************************************************************
470 
471  //**********************************************************************************************
476  inline bool canSMPAssign() const noexcept {
477  return ( !BLAZE_BLAS_IS_PARALLEL ||
478  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
479  ( rows() * columns() >= SMP_DMATTDMATMULT_THRESHOLD ) &&
481  }
482  //**********************************************************************************************
483 
484  private:
485  //**Member variables****************************************************************************
486  LeftOperand lhs_;
487  RightOperand rhs_;
488  //**********************************************************************************************
489 
490  //**Assignment to dense matrices****************************************************************
503  template< typename MT // Type of the target dense matrix
504  , bool SO > // Storage order of the target dense matrix
505  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
506  {
508 
509  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
510  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
511 
512  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
513  return;
514  }
515  else if( rhs.lhs_.columns() == 0UL ) {
516  reset( ~lhs );
517  return;
518  }
519 
520  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
521  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
522 
523  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
524  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
525  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
526  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
527  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
528  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
529 
530  DMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
531  }
533  //**********************************************************************************************
534 
535  //**Assignment to dense matrices (kernel selection)*********************************************
546  template< typename MT3 // Type of the left-hand side target matrix
547  , typename MT4 // Type of the left-hand side matrix operand
548  , typename MT5 > // Type of the right-hand side matrix operand
549  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
550  {
552  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
553  selectSmallAssignKernel( C, A, B );
554  else
555  selectBlasAssignKernel( C, A, B );
556  }
558  //**********************************************************************************************
559 
560  //**Default assignment to row-major dense matrices (general/general)****************************
574  template< typename MT3 // Type of the left-hand side target matrix
575  , typename MT4 // Type of the left-hand side matrix operand
576  , typename MT5 > // Type of the right-hand side matrix operand
578  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
579  {
580  const size_t M( A.rows() );
581  const size_t N( B.columns() );
582  const size_t K( A.columns() );
583 
584  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
585 
586  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
587  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
588  :( 0UL ) );
589  const size_t iend( ( IsStrictlyUpper<MT4>::value )
590  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
591  :( M ) );
592  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
593 
594  for( size_t i=0UL; i<ibegin; ++i ) {
595  for( size_t j=0UL; j<N; ++j ) {
596  reset( (~C)(i,j) );
597  }
598  }
599  for( size_t i=ibegin; i<iend; ++i )
600  {
601  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
603  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
604  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
606  ?( SYM || HERM || UPP ? max( i, 1UL ) : 1UL )
607  :( SYM || HERM || UPP ? i : 0UL ) ) );
608  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
610  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
611  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
613  ?( LOW ? min(i+1UL,N-1UL) : N-1UL )
614  :( LOW ? i+1UL : N ) ) );
615 
616  if( ( SYM || HERM || LOW || UPP ) && ( jbegin > jend ) ) {
617  for( size_t j=0UL; j<N; ++j ) {
618  reset( (~C)(i,j) );
619  }
620  continue;
621  }
622 
623  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
624 
625  for( size_t j=( SYM || HERM ? i : 0UL ); j<jbegin; ++j ) {
626  reset( (~C)(i,j) );
627  }
628  for( size_t j=jbegin; j<jend; ++j )
629  {
630  const size_t kbegin( ( IsUpper<MT4>::value )
631  ?( ( IsLower<MT5>::value )
632  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
633  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
634  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
635  :( ( IsLower<MT5>::value )
636  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
637  :( 0UL ) ) );
638  const size_t kend( ( IsLower<MT4>::value )
639  ?( ( IsUpper<MT5>::value )
640  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
641  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
642  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
643  :( ( IsUpper<MT5>::value )
644  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
645  :( K ) ) );
646  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
647 
648  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
649  for( size_t k=kbegin+1UL; k<kend; ++k ) {
650  (~C)(i,j) += A(i,k) * B(k,j);
651  }
652  }
653  for( size_t j=jend; j<N; ++j ) {
654  reset( (~C)(i,j) );
655  }
656  }
657  for( size_t i=iend; i<M; ++i ) {
658  for( size_t j=0UL; j<N; ++j ) {
659  reset( (~C)(i,j) );
660  }
661  }
662 
663  if( SYM || HERM ) {
664  for( size_t i=1UL; i<M; ++i ) {
665  for( size_t j=0UL; j<i; ++j ) {
666  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
667  }
668  }
669  }
670  }
672  //**********************************************************************************************
673 
674  //**Default assignment to column-major dense matrices (general/general)*************************
688  template< typename MT3 // Type of the left-hand side target matrix
689  , typename MT4 // Type of the left-hand side matrix operand
690  , typename MT5 > // Type of the right-hand side matrix operand
691  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
692  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
693  {
694  const size_t M( A.rows() );
695  const size_t N( B.columns() );
696  const size_t K( A.columns() );
697 
698  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
699 
700  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
701  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
702  :( 0UL ) );
703  const size_t jend( ( IsStrictlyLower<MT5>::value )
704  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
705  :( N ) );
706  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
707 
708  for( size_t j=0UL; j<jbegin; ++j ) {
709  for( size_t i=0UL; i<M; ++i ) {
710  reset( (~C)(i,j) );
711  }
712  }
713  for( size_t j=jbegin; j<jend; ++j )
714  {
715  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
717  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
718  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
720  ?( SYM || HERM || LOW ? max( j, 1UL ) : 1UL )
721  :( SYM || HERM || LOW ? j : 0UL ) ) );
722  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
724  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
725  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
727  ?( UPP ? min(j+1UL,M-1UL) : M-1UL )
728  :( UPP ? j+1UL : M ) ) );
729 
730  if( ( SYM || HERM || LOW || UPP ) && ( ibegin > iend ) ) {
731  for( size_t i=0UL; i<M; ++i ) {
732  reset( (~C)(i,j) );
733  }
734  continue;
735  }
736 
737  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
738 
739  for( size_t i=( SYM || HERM ? j : 0UL ); i<ibegin; ++i ) {
740  reset( (~C)(i,j) );
741  }
742  for( size_t i=ibegin; i<iend; ++i )
743  {
744  const size_t kbegin( ( IsUpper<MT4>::value )
745  ?( ( IsLower<MT5>::value )
746  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
747  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
748  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
749  :( ( IsLower<MT5>::value )
750  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
751  :( 0UL ) ) );
752  const size_t kend( ( IsLower<MT4>::value )
753  ?( ( IsUpper<MT5>::value )
754  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
755  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
756  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
757  :( ( IsUpper<MT5>::value )
758  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
759  :( K ) ) );
760  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
761 
762  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
763  for( size_t k=kbegin+1UL; k<kend; ++k ) {
764  (~C)(i,j) += A(i,k) * B(k,j);
765  }
766  }
767  for( size_t i=iend; i<M; ++i ) {
768  reset( (~C)(i,j) );
769  }
770  }
771  for( size_t j=jend; j<N; ++j ) {
772  for( size_t i=0UL; i<M; ++i ) {
773  reset( (~C)(i,j) );
774  }
775  }
776 
777  if( SYM || HERM ) {
778  for( size_t j=1UL; j<N; ++j ) {
779  for( size_t i=0UL; i<j; ++i ) {
780  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
781  }
782  }
783  }
784  }
786  //**********************************************************************************************
787 
788  //**Default assignment to row-major dense matrices (general/diagonal)***************************
802  template< typename MT3 // Type of the left-hand side target matrix
803  , typename MT4 // Type of the left-hand side matrix operand
804  , typename MT5 > // Type of the right-hand side matrix operand
805  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
806  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
807  {
808  const size_t M( A.rows() );
809  const size_t N( B.columns() );
810 
811  for( size_t i=0UL; i<M; ++i )
812  {
813  const size_t jbegin( ( IsUpper<MT4>::value )
814  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
815  :( 0UL ) );
816  const size_t jend( ( IsLower<MT4>::value )
817  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
818  :( N ) );
819  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
820 
821  if( IsUpper<MT4>::value ) {
822  for( size_t j=0UL; j<jbegin; ++j ) {
823  reset( (~C)(i,j) );
824  }
825  }
826  for( size_t j=jbegin; j<jend; ++j ) {
827  (~C)(i,j) = A(i,j) * B(j,j);
828  }
829  if( IsLower<MT4>::value ) {
830  for( size_t j=jend; j<N; ++j ) {
831  reset( (~C)(i,j) );
832  }
833  }
834  }
835  }
837  //**********************************************************************************************
838 
839  //**Default assignment to column-major dense matrices (general/diagonal)************************
853  template< typename MT3 // Type of the left-hand side target matrix
854  , typename MT4 // Type of the left-hand side matrix operand
855  , typename MT5 > // Type of the right-hand side matrix operand
856  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
857  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
858  {
859  constexpr size_t block( BLOCK_SIZE );
860 
861  const size_t M( A.rows() );
862  const size_t N( B.columns() );
863 
864  for( size_t jj=0UL; jj<N; jj+=block ) {
865  const size_t jend( min( N, jj+block ) );
866  for( size_t ii=0UL; ii<M; ii+=block ) {
867  const size_t iend( min( M, ii+block ) );
868  for( size_t j=jj; j<jend; ++j )
869  {
870  const size_t ibegin( ( IsLower<MT4>::value )
871  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
872  :( ii ) );
873  const size_t ipos( ( IsUpper<MT4>::value )
874  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
875  :( iend ) );
876 
877  if( IsLower<MT4>::value ) {
878  for( size_t i=ii; i<ibegin; ++i ) {
879  reset( (~C)(i,j) );
880  }
881  }
882  for( size_t i=ibegin; i<ipos; ++i ) {
883  (~C)(i,j) = A(i,j) * B(j,j);
884  }
885  if( IsUpper<MT4>::value ) {
886  for( size_t i=ipos; i<iend; ++i ) {
887  reset( (~C)(i,j) );
888  }
889  }
890  }
891  }
892  }
893  }
895  //**********************************************************************************************
896 
897  //**Default assignment to row-major dense matrices (diagonal/general)***************************
911  template< typename MT3 // Type of the left-hand side target matrix
912  , typename MT4 // Type of the left-hand side matrix operand
913  , typename MT5 > // Type of the right-hand side matrix operand
915  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
916  {
917  constexpr size_t block( BLOCK_SIZE );
918 
919  const size_t M( A.rows() );
920  const size_t N( B.columns() );
921 
922  for( size_t ii=0UL; ii<M; ii+=block ) {
923  const size_t iend( min( M, ii+block ) );
924  for( size_t jj=0UL; jj<N; jj+=block ) {
925  const size_t jend( min( N, jj+block ) );
926  for( size_t i=ii; i<iend; ++i )
927  {
928  const size_t jbegin( ( IsUpper<MT5>::value )
929  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
930  :( jj ) );
931  const size_t jpos( ( IsLower<MT5>::value )
932  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
933  :( jend ) );
934 
935  if( IsUpper<MT5>::value ) {
936  for( size_t j=jj; j<jbegin; ++j ) {
937  reset( (~C)(i,j) );
938  }
939  }
940  for( size_t j=jbegin; j<jpos; ++j ) {
941  (~C)(i,j) = A(i,i) * B(i,j);
942  }
943  if( IsLower<MT5>::value ) {
944  for( size_t j=jpos; j<jend; ++j ) {
945  reset( (~C)(i,j) );
946  }
947  }
948  }
949  }
950  }
951  }
953  //**********************************************************************************************
954 
955  //**Default assignment to column-major dense matrices (diagonal/general)************************
969  template< typename MT3 // Type of the left-hand side target matrix
970  , typename MT4 // Type of the left-hand side matrix operand
971  , typename MT5 > // Type of the right-hand side matrix operand
972  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
973  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
974  {
975  const size_t M( A.rows() );
976  const size_t N( B.columns() );
977 
978  for( size_t j=0UL; j<N; ++j )
979  {
980  const size_t ibegin( ( IsLower<MT5>::value )
981  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
982  :( 0UL ) );
983  const size_t iend( ( IsUpper<MT5>::value )
984  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
985  :( M ) );
986  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
987 
988  if( IsLower<MT5>::value ) {
989  for( size_t i=0UL; i<ibegin; ++i ) {
990  reset( (~C)(i,j) );
991  }
992  }
993  for( size_t i=ibegin; i<iend; ++i ) {
994  (~C)(i,j) = A(i,i) * B(i,j);
995  }
996  if( IsUpper<MT5>::value ) {
997  for( size_t i=iend; i<M; ++i ) {
998  reset( (~C)(i,j) );
999  }
1000  }
1001  }
1002  }
1004  //**********************************************************************************************
1005 
1006  //**Default assignment to dense matrices (diagonal/diagonal)************************************
1020  template< typename MT3 // Type of the left-hand side target matrix
1021  , typename MT4 // Type of the left-hand side matrix operand
1022  , typename MT5 > // Type of the right-hand side matrix operand
1023  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
1024  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
1025  {
1026  reset( C );
1027 
1028  for( size_t i=0UL; i<A.rows(); ++i ) {
1029  C(i,i) = A(i,i) * B(i,i);
1030  }
1031  }
1033  //**********************************************************************************************
1034 
1035  //**Default assignment to dense matrices (small matrices)***************************************
1049  template< typename MT3 // Type of the left-hand side target matrix
1050  , typename MT4 // Type of the left-hand side matrix operand
1051  , typename MT5 > // Type of the right-hand side matrix operand
1053  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B )
1054  {
1055  selectDefaultAssignKernel( C, A, B );
1056  }
1058  //**********************************************************************************************
1059 
1060  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
1075  template< typename MT3 // Type of the left-hand side target matrix
1076  , typename MT4 // Type of the left-hand side matrix operand
1077  , typename MT5 > // Type of the right-hand side matrix operand
1079  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1080  {
1081  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
1082 
1083  const size_t M( A.rows() );
1084  const size_t N( B.columns() );
1085  const size_t K( A.columns() );
1086 
1087  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1088 
1089  if( LOW && UPP ) {
1090  reset( ~C );
1091  }
1092 
1093  {
1094  size_t i( 0UL );
1095 
1096  for( ; !( LOW && UPP ) && (i+2UL) <= M; i+=2UL )
1097  {
1098  const size_t jend( LOW ? i+2UL : N );
1099  size_t j( SYM || HERM || UPP ? i : 0UL );
1100 
1101  for( ; (j+4UL) <= jend; j+=4UL )
1102  {
1103  const size_t kbegin( ( IsUpper<MT4>::value )
1104  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1105  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1106  const size_t kend( ( IsLower<MT4>::value )
1107  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
1108  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
1109 
1110  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1111  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1112 
1113  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1114  size_t k( kbegin );
1115 
1116  for( ; k<kpos; k+=SIMDSIZE ) {
1117  const SIMDType a1( A.load(i ,k) );
1118  const SIMDType a2( A.load(i+1UL,k) );
1119  const SIMDType b1( B.load(k,j ) );
1120  const SIMDType b2( B.load(k,j+1UL) );
1121  const SIMDType b3( B.load(k,j+2UL) );
1122  const SIMDType b4( B.load(k,j+3UL) );
1123  xmm1 += a1 * b1;
1124  xmm2 += a1 * b2;
1125  xmm3 += a1 * b3;
1126  xmm4 += a1 * b4;
1127  xmm5 += a2 * b1;
1128  xmm6 += a2 * b2;
1129  xmm7 += a2 * b3;
1130  xmm8 += a2 * b4;
1131  }
1132 
1133  (~C)(i ,j ) = sum( xmm1 );
1134  (~C)(i ,j+1UL) = sum( xmm2 );
1135  (~C)(i ,j+2UL) = sum( xmm3 );
1136  (~C)(i ,j+3UL) = sum( xmm4 );
1137  (~C)(i+1UL,j ) = sum( xmm5 );
1138  (~C)(i+1UL,j+1UL) = sum( xmm6 );
1139  (~C)(i+1UL,j+2UL) = sum( xmm7 );
1140  (~C)(i+1UL,j+3UL) = sum( xmm8 );
1141 
1142  for( ; remainder && k<kend; ++k ) {
1143  (~C)(i ,j ) += A(i ,k) * B(k,j );
1144  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1145  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL);
1146  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL);
1147  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1148  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1149  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL);
1150  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL);
1151  }
1152  }
1153 
1154  for( ; (j+2UL) <= jend; j+=2UL )
1155  {
1156  const size_t kbegin( ( IsUpper<MT4>::value )
1157  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1158  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1159  const size_t kend( ( IsLower<MT4>::value )
1160  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
1161  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1162 
1163  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1164  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1165 
1166  SIMDType xmm1, xmm2, xmm3, xmm4;
1167  size_t k( kbegin );
1168 
1169  for( ; k<kpos; k+=SIMDSIZE ) {
1170  const SIMDType a1( A.load(i ,k) );
1171  const SIMDType a2( A.load(i+1UL,k) );
1172  const SIMDType b1( B.load(k,j ) );
1173  const SIMDType b2( B.load(k,j+1UL) );
1174  xmm1 += a1 * b1;
1175  xmm2 += a1 * b2;
1176  xmm3 += a2 * b1;
1177  xmm4 += a2 * b2;
1178  }
1179 
1180  (~C)(i ,j ) = sum( xmm1 );
1181  (~C)(i ,j+1UL) = sum( xmm2 );
1182  (~C)(i+1UL,j ) = sum( xmm3 );
1183  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1184 
1185  for( ; remainder && k<kend; ++k ) {
1186  (~C)(i ,j ) += A(i ,k) * B(k,j );
1187  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1188  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1189  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1190  }
1191  }
1192 
1193  if( j < jend )
1194  {
1195  const size_t kbegin( ( IsUpper<MT4>::value )
1196  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1197  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1198  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
1199 
1200  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1201  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1202 
1203  SIMDType xmm1, xmm2;
1204  size_t k( kbegin );
1205 
1206  for( ; k<kpos; k+=SIMDSIZE ) {
1207  const SIMDType b1( B.load(k,j) );
1208  xmm1 += A.load(i ,k) * b1;
1209  xmm2 += A.load(i+1UL,k) * b1;
1210  }
1211 
1212  (~C)(i ,j) = sum( xmm1 );
1213  (~C)(i+1UL,j) = sum( xmm2 );
1214 
1215  for( ; remainder && k<kend; ++k ) {
1216  (~C)(i ,j) += A(i ,k) * B(k,j);
1217  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
1218  }
1219  }
1220  }
1221 
1222  for( ; i<M; ++i )
1223  {
1224  const size_t jend( LOW ? i+1UL : N );
1225  size_t j( SYM || HERM || UPP ? i : 0UL );
1226 
1227  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
1228  {
1229  const size_t kbegin( ( IsUpper<MT4>::value )
1230  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1231  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1232  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
1233 
1234  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1235  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1236 
1237  SIMDType xmm1, xmm2, xmm3, xmm4;
1238  size_t k( kbegin );
1239 
1240  for( ; k<kpos; k+=SIMDSIZE ) {
1241  const SIMDType a1( A.load(i,k) );
1242  xmm1 += a1 * B.load(k,j );
1243  xmm2 += a1 * B.load(k,j+1UL);
1244  xmm3 += a1 * B.load(k,j+2UL);
1245  xmm4 += a1 * B.load(k,j+3UL);
1246  }
1247 
1248  (~C)(i,j ) = sum( xmm1 );
1249  (~C)(i,j+1UL) = sum( xmm2 );
1250  (~C)(i,j+2UL) = sum( xmm3 );
1251  (~C)(i,j+3UL) = sum( xmm4 );
1252 
1253  for( ; remainder && k<kend; ++k ) {
1254  (~C)(i,j ) += A(i,k) * B(k,j );
1255  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
1256  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL);
1257  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL);
1258  }
1259  }
1260 
1261  for( ; !( LOW && UPP ) && (j+2UL) <= jend; j+=2UL )
1262  {
1263  const size_t kbegin( ( IsUpper<MT4>::value )
1264  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1265  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1266  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
1267 
1268  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1269  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1270 
1271  SIMDType xmm1, xmm2;
1272  size_t k( kbegin );
1273 
1274  for( ; k<kpos; k+=SIMDSIZE ) {
1275  const SIMDType a1( A.load(i,k) );
1276  xmm1 += a1 * B.load(k,j );
1277  xmm2 += a1 * B.load(k,j+1UL);
1278  }
1279 
1280  (~C)(i,j ) = sum( xmm1 );
1281  (~C)(i,j+1UL) = sum( xmm2 );
1282 
1283  for( ; remainder && k<kend; ++k ) {
1284  (~C)(i,j ) += A(i,k) * B(k,j );
1285  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
1286  }
1287  }
1288 
1289  for( ; j<jend; ++j )
1290  {
1291  const size_t kbegin( ( IsUpper<MT4>::value )
1292  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1293  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1294 
1295  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
1296  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1297 
1298  SIMDType xmm1;
1299  size_t k( kbegin );
1300 
1301  for( ; k<kpos; k+=SIMDSIZE ) {
1302  xmm1 += A.load(i,k) * B.load(k,j);
1303  }
1304 
1305  (~C)(i,j) = sum( xmm1 );
1306 
1307  for( ; remainder && k<K; ++k ) {
1308  (~C)(i,j) += A(i,k) * B(k,j);
1309  }
1310  }
1311  }
1312  }
1313 
1314  if( SYM || HERM ) {
1315  for( size_t i=2UL; i<M; ++i ) {
1316  const size_t jend( 2UL * ( i/2UL ) );
1317  for( size_t j=0UL; j<jend; ++j ) {
1318  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
1319  }
1320  }
1321  }
1322  else if( LOW && !UPP ) {
1323  for( size_t j=2UL; j<N; ++j ) {
1324  const size_t iend( 2UL * ( j/2UL ) );
1325  for( size_t i=0UL; i<iend; ++i ) {
1326  reset( (~C)(i,j) );
1327  }
1328  }
1329  }
1330  else if( !LOW && UPP ) {
1331  for( size_t i=2UL; i<M; ++i ) {
1332  const size_t jend( 2UL * ( i/2UL ) );
1333  for( size_t j=0UL; j<jend; ++j ) {
1334  reset( (~C)(i,j) );
1335  }
1336  }
1337  }
1338  }
1340  //**********************************************************************************************
1341 
1342  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
1357  template< typename MT3 // Type of the left-hand side target matrix
1358  , typename MT4 // Type of the left-hand side matrix operand
1359  , typename MT5 > // Type of the right-hand side matrix operand
1361  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1362  {
1363  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
1364 
1365  const size_t M( A.rows() );
1366  const size_t N( B.columns() );
1367  const size_t K( A.columns() );
1368 
1369  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1370 
1371  if( LOW && UPP ) {
1372  reset( ~C );
1373  }
1374 
1375  {
1376  size_t i( 0UL );
1377 
1378  for( ; !( LOW && UPP ) && (i+4UL) <= M; i+=4UL )
1379  {
1380  const size_t jend( SYM || HERM || LOW ? i+4UL : N );
1381  size_t j( UPP ? i : 0UL );
1382 
1383  for( ; (j+2UL) <= jend; j+=2UL )
1384  {
1385  const size_t kbegin( ( IsUpper<MT4>::value )
1386  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1387  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1388  const size_t kend( ( IsLower<MT4>::value )
1389  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
1390  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1391 
1392  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1393  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1394 
1395  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1396  size_t k( kbegin );
1397 
1398  for( ; k<kpos; k+=SIMDSIZE ) {
1399  const SIMDType a1( A.load(i ,k) );
1400  const SIMDType a2( A.load(i+1UL,k) );
1401  const SIMDType a3( A.load(i+2UL,k) );
1402  const SIMDType a4( A.load(i+3UL,k) );
1403  const SIMDType b1( B.load(k,j ) );
1404  const SIMDType b2( B.load(k,j+1UL) );
1405  xmm1 += a1 * b1;
1406  xmm2 += a1 * b2;
1407  xmm3 += a2 * b1;
1408  xmm4 += a2 * b2;
1409  xmm5 += a3 * b1;
1410  xmm6 += a3 * b2;
1411  xmm7 += a4 * b1;
1412  xmm8 += a4 * b2;
1413  }
1414 
1415  (~C)(i ,j ) = sum( xmm1 );
1416  (~C)(i ,j+1UL) = sum( xmm2 );
1417  (~C)(i+1UL,j ) = sum( xmm3 );
1418  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1419  (~C)(i+2UL,j ) = sum( xmm5 );
1420  (~C)(i+2UL,j+1UL) = sum( xmm6 );
1421  (~C)(i+3UL,j ) = sum( xmm7 );
1422  (~C)(i+3UL,j+1UL) = sum( xmm8 );
1423 
1424  for( ; remainder && k<kend; ++k ) {
1425  (~C)(i ,j ) += A(i ,k) * B(k,j );
1426  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1427  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1428  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1429  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j );
1430  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL);
1431  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j );
1432  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL);
1433  }
1434  }
1435 
1436  if( j < jend )
1437  {
1438  const size_t kbegin( ( IsUpper<MT4>::value )
1439  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1440  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1441  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
1442 
1443  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1444  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1445 
1446  SIMDType xmm1, xmm2, xmm3, xmm4;
1447  size_t k( kbegin );
1448 
1449  for( ; k<kpos; k+=SIMDSIZE ) {
1450  const SIMDType b1( B.load(k,j) );
1451  xmm1 += A.load(i ,k) * b1;
1452  xmm2 += A.load(i+1UL,k) * b1;
1453  xmm3 += A.load(i+2UL,k) * b1;
1454  xmm4 += A.load(i+3UL,k) * b1;
1455  }
1456 
1457  (~C)(i ,j) = sum( xmm1 );
1458  (~C)(i+1UL,j) = sum( xmm2 );
1459  (~C)(i+2UL,j) = sum( xmm3 );
1460  (~C)(i+3UL,j) = sum( xmm4 );
1461 
1462  for( ; remainder && k<kend; ++k ) {
1463  (~C)(i ,j) += A(i ,k) * B(k,j);
1464  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
1465  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j);
1466  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j);
1467  }
1468  }
1469  }
1470 
1471  for( ; !( LOW && UPP ) && (i+2UL) <= M; i+=2UL )
1472  {
1473  size_t j( 0UL );
1474 
1475  for( ; (j+2UL) <= N; j+=2UL )
1476  {
1477  const size_t kbegin( ( IsUpper<MT4>::value )
1478  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1479  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1480  const size_t kend( ( IsLower<MT4>::value )
1481  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
1482  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1483 
1484  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1485  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1486 
1487  SIMDType xmm1, xmm2, xmm3, xmm4;
1488  size_t k( kbegin );
1489 
1490  for( ; k<kpos; k+=SIMDSIZE ) {
1491  const SIMDType a1( A.load(i ,k) );
1492  const SIMDType a2( A.load(i+1UL,k) );
1493  const SIMDType b1( B.load(k,j ) );
1494  const SIMDType b2( B.load(k,j+1UL) );
1495  xmm1 += a1 * b1;
1496  xmm2 += a1 * b2;
1497  xmm3 += a2 * b1;
1498  xmm4 += a2 * b2;
1499  }
1500 
1501  (~C)(i ,j ) = sum( xmm1 );
1502  (~C)(i ,j+1UL) = sum( xmm2 );
1503  (~C)(i+1UL,j ) = sum( xmm3 );
1504  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1505 
1506  for( ; remainder && k<kend; ++k ) {
1507  (~C)(i ,j ) += A(i ,k) * B(k,j );
1508  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1509  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1510  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1511  }
1512  }
1513 
1514  if( j < N )
1515  {
1516  const size_t kbegin( ( IsUpper<MT4>::value )
1517  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1518  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1519  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
1520 
1521  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1522  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1523 
1524  SIMDType xmm1, xmm2;
1525  size_t k( kbegin );
1526 
1527  for( ; k<kpos; k+=SIMDSIZE ) {
1528  const SIMDType b1( B.load(k,j) );
1529  xmm1 += A.load(i ,k) * b1;
1530  xmm2 += A.load(i+1UL,k) * b1;
1531  }
1532 
1533  (~C)(i ,j) = sum( xmm1 );
1534  (~C)(i+1UL,j) = sum( xmm2 );
1535 
1536  for( ; remainder && k<kend; ++k ) {
1537  (~C)(i ,j) += A(i ,k) * B(k,j);
1538  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
1539  }
1540  }
1541  }
1542 
1543  for( ; i<M; ++i )
1544  {
1545  const size_t jend( LOW && UPP ? i+1UL : N );
1546  size_t j( LOW && UPP ? i : 0UL );
1547 
1548  for( ; !( LOW && UPP ) && (j+2UL) <= jend; j+=2UL )
1549  {
1550  const size_t kbegin( ( IsUpper<MT4>::value )
1551  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1552  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1553  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
1554 
1555  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1556  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1557 
1558  SIMDType xmm1, xmm2;
1559  size_t k( kbegin );
1560 
1561  for( ; k<kpos; k+=SIMDSIZE ) {
1562  const SIMDType a1( A.load(i,k) );
1563  xmm1 += a1 * B.load(k,j );
1564  xmm2 += a1 * B.load(k,j+1UL);
1565  }
1566 
1567  (~C)(i,j ) = sum( xmm1 );
1568  (~C)(i,j+1UL) = sum( xmm2 );
1569 
1570  for( ; remainder && k<kend; ++k ) {
1571  (~C)(i,j ) += A(i,k) * B(k,j );
1572  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
1573  }
1574  }
1575 
1576  for( ; j<jend; ++j )
1577  {
1578  const size_t kbegin( ( IsUpper<MT4>::value )
1579  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1580  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1581 
1582  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
1583  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1584 
1585  SIMDType xmm1;
1586  size_t k( kbegin );
1587 
1588  for( ; k<kpos; k+=SIMDSIZE ) {
1589  xmm1 += A.load(i,k) * B.load(k,j);
1590  }
1591 
1592  (~C)(i,j) = sum( xmm1 );
1593 
1594  for( ; remainder && k<K; ++k ) {
1595  (~C)(i,j) += A(i,k) * B(k,j);
1596  }
1597  }
1598  }
1599  }
1600 
1601  if( ( SYM || HERM ) && ( N > 4UL ) ) {
1602  for( size_t j=4UL; j<N; ++j ) {
1603  const size_t iend( 4UL * ( j/4UL ) );
1604  for( size_t i=0UL; i<iend; ++i ) {
1605  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
1606  }
1607  }
1608  }
1609  else if( LOW && !UPP ) {
1610  for( size_t j=4UL; j<N; ++j ) {
1611  const size_t iend( 4UL * ( j/4UL ) );
1612  for( size_t i=0UL; i<iend; ++i ) {
1613  reset( (~C)(i,j) );
1614  }
1615  }
1616  }
1617  else if( !LOW && UPP ) {
1618  for( size_t i=4UL; i<N; ++i ) {
1619  const size_t jend( 4UL * ( i/4UL ) );
1620  for( size_t j=0UL; j<jend; ++j ) {
1621  reset( (~C)(i,j) );
1622  }
1623  }
1624  }
1625  }
1627  //**********************************************************************************************
1628 
1629  //**Default assignment to dense matrices (large matrices)***************************************
1643  template< typename MT3 // Type of the left-hand side target matrix
1644  , typename MT4 // Type of the left-hand side matrix operand
1645  , typename MT5 > // Type of the right-hand side matrix operand
1647  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1648  {
1649  selectDefaultAssignKernel( C, A, B );
1650  }
1652  //**********************************************************************************************
1653 
1654  //**Vectorized default assignment to dense matrices (large matrices)****************************
1669  template< typename MT3 // Type of the left-hand side target matrix
1670  , typename MT4 // Type of the left-hand side matrix operand
1671  , typename MT5 > // Type of the right-hand side matrix operand
1673  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1674  {
1675  if( SYM )
1676  smmm( C, A, B, ElementType(1) );
1677  else if( HERM )
1678  hmmm( C, A, B, ElementType(1) );
1679  else if( LOW )
1680  lmmm( C, A, B, ElementType(1), ElementType(0) );
1681  else if( UPP )
1682  ummm( C, A, B, ElementType(1), ElementType(0) );
1683  else
1684  mmm( C, A, B, ElementType(1), ElementType(0) );
1685  }
1687  //**********************************************************************************************
1688 
1689  //**BLAS-based assignment to dense matrices (default)*******************************************
1703  template< typename MT3 // Type of the left-hand side target matrix
1704  , typename MT4 // Type of the left-hand side matrix operand
1705  , typename MT5 > // Type of the right-hand side matrix operand
1707  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1708  {
1709  selectLargeAssignKernel( C, A, B );
1710  }
1712  //**********************************************************************************************
1713 
1714  //**BLAS-based assignment to dense matrices*****************************************************
1715 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
1716 
1729  template< typename MT3 // Type of the left-hand side target matrix
1730  , typename MT4 // Type of the left-hand side matrix operand
1731  , typename MT5 > // Type of the right-hand side matrix operand
1733  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1734  {
1735  typedef ElementType_<MT3> ET;
1736 
1737  if( IsTriangular<MT4>::value ) {
1738  assign( C, B );
1739  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1740  }
1741  else if( IsTriangular<MT5>::value ) {
1742  assign( C, A );
1743  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1744  }
1745  else {
1746  gemm( C, A, B, ET(1), ET(0) );
1747  }
1748  }
1750 #endif
1751  //**********************************************************************************************
1752 
1753  //**Assignment to sparse matrices***************************************************************
1766  template< typename MT // Type of the target sparse matrix
1767  , bool SO > // Storage order of the target sparse matrix
1768  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1769  {
1771 
1773 
1780 
1781  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1782  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1783 
1784  const ForwardFunctor fwd;
1785 
1786  const TmpType tmp( serial( rhs ) );
1787  assign( ~lhs, fwd( tmp ) );
1788  }
1790  //**********************************************************************************************
1791 
1792  //**Addition assignment to dense matrices*******************************************************
1805  template< typename MT // Type of the target dense matrix
1806  , bool SO > // Storage order of the target dense matrix
1807  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1808  {
1810 
1811  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1812  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1813 
1814  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1815  return;
1816  }
1817 
1818  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1819  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1820 
1821  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1822  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1823  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1824  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1825  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1826  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1827 
1828  DMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1829  }
1831  //**********************************************************************************************
1832 
1833  //**Addition assignment to dense matrices (kernel selection)************************************
1844  template< typename MT3 // Type of the left-hand side target matrix
1845  , typename MT4 // Type of the left-hand side matrix operand
1846  , typename MT5 > // Type of the right-hand side matrix operand
1847  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1848  {
1850  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
1851  selectSmallAddAssignKernel( C, A, B );
1852  else
1853  selectBlasAddAssignKernel( C, A, B );
1854  }
1856  //**********************************************************************************************
1857 
1858  //**Default addition assignment to row-major dense matrices (general/general)*******************
1872  template< typename MT3 // Type of the left-hand side target matrix
1873  , typename MT4 // Type of the left-hand side matrix operand
1874  , typename MT5 > // Type of the right-hand side matrix operand
1875  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
1876  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1877  {
1878  const size_t M( A.rows() );
1879  const size_t N( B.columns() );
1880  const size_t K( A.columns() );
1881 
1882  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1883 
1884  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
1885  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
1886  :( 0UL ) );
1887  const size_t iend( ( IsStrictlyUpper<MT4>::value )
1888  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
1889  :( M ) );
1890  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1891 
1892  for( size_t i=ibegin; i<iend; ++i )
1893  {
1894  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
1896  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
1897  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
1899  ?( UPP ? max( i, 1UL ) : 1UL )
1900  :( UPP ? i : 0UL ) ) );
1901  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
1903  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
1904  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
1906  ?( LOW ? min(i+1UL,N-1UL) : N-1UL )
1907  :( LOW ? i+1UL : N ) ) );
1908 
1909  if( ( LOW || UPP ) && ( jbegin > jend ) ) continue;
1910  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1911 
1912  for( size_t j=jbegin; j<jend; ++j )
1913  {
1914  const size_t kbegin( ( IsUpper<MT4>::value )
1915  ?( ( IsLower<MT5>::value )
1916  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1917  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1918  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1919  :( ( IsLower<MT5>::value )
1920  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1921  :( 0UL ) ) );
1922  const size_t kend( ( IsLower<MT4>::value )
1923  ?( ( IsUpper<MT5>::value )
1924  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
1925  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
1926  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1927  :( ( IsUpper<MT5>::value )
1928  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1929  :( K ) ) );
1930  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
1931 
1932  const size_t knum( kend - kbegin );
1933  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
1934 
1935  for( size_t k=kbegin; k<kpos; k+=2UL ) {
1936  (~C)(i,j) += A(i,k ) * B(k ,j);
1937  (~C)(i,j) += A(i,k+1UL) * B(k+1UL,j);
1938  }
1939  if( kpos < kend ) {
1940  (~C)(i,j) += A(i,kpos) * B(kpos,j);
1941  }
1942  }
1943  }
1944  }
1946  //**********************************************************************************************
1947 
1948  //**Default addition assignment to column-major dense matrices (general/general)****************
1962  template< typename MT3 // Type of the left-hand side target matrix
1963  , typename MT4 // Type of the left-hand side matrix operand
1964  , typename MT5 > // Type of the right-hand side matrix operand
1965  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
1966  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1967  {
1968  const size_t M( A.rows() );
1969  const size_t N( B.columns() );
1970  const size_t K( A.columns() );
1971 
1972  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1973 
1974  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
1975  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
1976  :( 0UL ) );
1977  const size_t jend( ( IsStrictlyLower<MT5>::value )
1978  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
1979  :( N ) );
1980  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1981 
1982  for( size_t j=jbegin; j<jend; ++j )
1983  {
1984  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
1986  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
1987  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1989  ?( LOW ? max( j, 1UL ) : 1UL )
1990  :( LOW ? j : 0UL ) ) );
1991  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
1993  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
1994  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
1996  ?( UPP ? min(j+1UL,M-1UL) : M-1UL )
1997  :( UPP ? j+1UL : M ) ) );
1998 
1999  if( ( LOW || UPP ) && ( ibegin > iend ) ) continue;
2000  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2001 
2002  for( size_t i=ibegin; i<iend; ++i )
2003  {
2004  const size_t kbegin( ( IsUpper<MT4>::value )
2005  ?( ( IsLower<MT5>::value )
2006  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2007  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2008  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2009  :( ( IsLower<MT5>::value )
2010  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2011  :( 0UL ) ) );
2012  const size_t kend( ( IsLower<MT4>::value )
2013  ?( ( IsUpper<MT5>::value )
2014  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
2015  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
2016  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
2017  :( ( IsUpper<MT5>::value )
2018  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2019  :( K ) ) );
2020  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
2021 
2022  const size_t knum( kend - kbegin );
2023  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
2024 
2025  for( size_t k=kbegin; k<kpos; k+=2UL ) {
2026  (~C)(i,j) += A(i,k ) * B(k ,j);
2027  (~C)(i,j) += A(i,k+1UL) * B(k+1UL,j);
2028  }
2029  if( kpos < kend ) {
2030  (~C)(i,j) += A(i,kpos) * B(kpos,j);
2031  }
2032  }
2033  }
2034  }
2036  //**********************************************************************************************
2037 
2038  //**Default addition assignment to row-major dense matrices (general/diagonal)******************
2052  template< typename MT3 // Type of the left-hand side target matrix
2053  , typename MT4 // Type of the left-hand side matrix operand
2054  , typename MT5 > // Type of the right-hand side matrix operand
2055  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
2056  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2057  {
2058  const size_t M( A.rows() );
2059  const size_t N( B.columns() );
2060 
2061  for( size_t i=0UL; i<M; ++i )
2062  {
2063  const size_t jbegin( ( IsUpper<MT4>::value )
2064  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2065  :( 0UL ) );
2066  const size_t jend( ( IsLower<MT4>::value )
2067  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
2068  :( N ) );
2069  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2070 
2071  const size_t jnum( jend - jbegin );
2072  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
2073 
2074  for( size_t j=jbegin; j<jpos; j+=2UL ) {
2075  (~C)(i,j ) += A(i,j ) * B(j ,j );
2076  (~C)(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL);
2077  }
2078  if( jpos < jend ) {
2079  (~C)(i,jpos) += A(i,jpos) * B(jpos,jpos);
2080  }
2081  }
2082  }
2084  //**********************************************************************************************
2085 
2086  //**Default addition assignment to column-major dense matrices (general/diagonal)***************
2100  template< typename MT3 // Type of the left-hand side target matrix
2101  , typename MT4 // Type of the left-hand side matrix operand
2102  , typename MT5 > // Type of the right-hand side matrix operand
2103  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
2104  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2105  {
2106  constexpr size_t block( BLOCK_SIZE );
2107 
2108  const size_t M( A.rows() );
2109  const size_t N( B.columns() );
2110 
2111  for( size_t jj=0UL; jj<N; jj+=block ) {
2112  const size_t jend( min( N, jj+block ) );
2113  for( size_t ii=0UL; ii<M; ii+=block ) {
2114  const size_t iend( min( M, ii+block ) );
2115  for( size_t j=jj; j<jend; ++j )
2116  {
2117  const size_t ibegin( ( IsLower<MT4>::value )
2118  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
2119  :( ii ) );
2120  const size_t ipos( ( IsUpper<MT4>::value )
2121  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
2122  :( iend ) );
2123 
2124  for( size_t i=ibegin; i<ipos; ++i ) {
2125  (~C)(i,j) += A(i,j) * B(j,j);
2126  }
2127  }
2128  }
2129  }
2130  }
2132  //**********************************************************************************************
2133 
2134  //**Default addition assignment to row-major dense matrices (diagonal/general)******************
2148  template< typename MT3 // Type of the left-hand side target matrix
2149  , typename MT4 // Type of the left-hand side matrix operand
2150  , typename MT5 > // Type of the right-hand side matrix operand
2151  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
2152  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2153  {
2154  constexpr size_t block( BLOCK_SIZE );
2155 
2156  const size_t M( A.rows() );
2157  const size_t N( B.columns() );
2158 
2159  for( size_t ii=0UL; ii<M; ii+=block ) {
2160  const size_t iend( min( M, ii+block ) );
2161  for( size_t jj=0UL; jj<N; jj+=block ) {
2162  const size_t jend( min( N, jj+block ) );
2163  for( size_t i=ii; i<iend; ++i )
2164  {
2165  const size_t jbegin( ( IsUpper<MT5>::value )
2166  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
2167  :( jj ) );
2168  const size_t jpos( ( IsLower<MT5>::value )
2169  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
2170  :( jend ) );
2171 
2172  for( size_t j=jbegin; j<jpos; ++j ) {
2173  (~C)(i,j) += A(i,i) * B(i,j);
2174  }
2175  }
2176  }
2177  }
2178  }
2180  //**********************************************************************************************
2181 
2182  //**Default addition assignment to column-major dense matrices (diagonal/general)***************
2196  template< typename MT3 // Type of the left-hand side target matrix
2197  , typename MT4 // Type of the left-hand side matrix operand
2198  , typename MT5 > // Type of the right-hand side matrix operand
2199  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
2200  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2201  {
2202  const size_t M( A.rows() );
2203  const size_t N( B.columns() );
2204 
2205  for( size_t j=0UL; j<N; ++j )
2206  {
2207  const size_t ibegin( ( IsLower<MT5>::value )
2208  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2209  :( 0UL ) );
2210  const size_t iend( ( IsUpper<MT5>::value )
2211  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2212  :( M ) );
2213  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2214 
2215  const size_t inum( iend - ibegin );
2216  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2217 
2218  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2219  (~C)(i ,j) += A(i ,i ) * B(i ,j);
2220  (~C)(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j);
2221  }
2222  if( ipos < iend ) {
2223  (~C)(ipos,j) += A(ipos,ipos) * B(ipos,j);
2224  }
2225  }
2226  }
2228  //**********************************************************************************************
2229 
2230  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
2244  template< typename MT3 // Type of the left-hand side target matrix
2245  , typename MT4 // Type of the left-hand side matrix operand
2246  , typename MT5 > // Type of the right-hand side matrix operand
2247  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
2248  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2249  {
2250  for( size_t i=0UL; i<A.rows(); ++i ) {
2251  C(i,i) += A(i,i) * B(i,i);
2252  }
2253  }
2255  //**********************************************************************************************
2256 
2257  //**Default addition assignment to dense matrices (small matrices)******************************
2271  template< typename MT3 // Type of the left-hand side target matrix
2272  , typename MT4 // Type of the left-hand side matrix operand
2273  , typename MT5 > // Type of the right-hand side matrix operand
2275  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2276  {
2277  selectDefaultAddAssignKernel( C, A, B );
2278  }
2280  //**********************************************************************************************
2281 
2282  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
2297  template< typename MT3 // Type of the left-hand side target matrix
2298  , typename MT4 // Type of the left-hand side matrix operand
2299  , typename MT5 > // Type of the right-hand side matrix operand
2301  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2302  {
2303  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
2304 
2305  const size_t M( A.rows() );
2306  const size_t N( B.columns() );
2307  const size_t K( A.columns() );
2308 
2309  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
2310 
2311  size_t i( 0UL );
2312 
2313  for( ; (i+2UL) <= M; i+=2UL )
2314  {
2315  const size_t jend( LOW ? i+2UL : N );
2316  size_t j( UPP ? i : 0UL );
2317 
2318  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
2319  {
2320  const size_t kbegin( ( IsUpper<MT4>::value )
2321  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2322  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2323  const size_t kend( ( IsLower<MT4>::value )
2324  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
2325  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
2326 
2327  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2328  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2329 
2330  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2331  size_t k( kbegin );
2332 
2333  for( ; k<kpos; k+=SIMDSIZE ) {
2334  const SIMDType a1( A.load(i ,k) );
2335  const SIMDType a2( A.load(i+1UL,k) );
2336  const SIMDType b1( B.load(k,j ) );
2337  const SIMDType b2( B.load(k,j+1UL) );
2338  const SIMDType b3( B.load(k,j+2UL) );
2339  const SIMDType b4( B.load(k,j+3UL) );
2340  xmm1 += a1 * b1;
2341  xmm2 += a1 * b2;
2342  xmm3 += a1 * b3;
2343  xmm4 += a1 * b4;
2344  xmm5 += a2 * b1;
2345  xmm6 += a2 * b2;
2346  xmm7 += a2 * b3;
2347  xmm8 += a2 * b4;
2348  }
2349 
2350  (~C)(i ,j ) += sum( xmm1 );
2351  (~C)(i ,j+1UL) += sum( xmm2 );
2352  (~C)(i ,j+2UL) += sum( xmm3 );
2353  (~C)(i ,j+3UL) += sum( xmm4 );
2354  (~C)(i+1UL,j ) += sum( xmm5 );
2355  (~C)(i+1UL,j+1UL) += sum( xmm6 );
2356  (~C)(i+1UL,j+2UL) += sum( xmm7 );
2357  (~C)(i+1UL,j+3UL) += sum( xmm8 );
2358 
2359  for( ; remainder && k<kend; ++k ) {
2360  (~C)(i ,j ) += A(i ,k) * B(k,j );
2361  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2362  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL);
2363  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL);
2364  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2365  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2366  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL);
2367  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL);
2368  }
2369  }
2370 
2371  for( ; (j+2UL) <= jend; j+=2UL )
2372  {
2373  const size_t kbegin( ( IsUpper<MT4>::value )
2374  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2375  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2376  const size_t kend( ( IsLower<MT4>::value )
2377  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
2378  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2379 
2380  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2381  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2382 
2383  SIMDType xmm1, xmm2, xmm3, xmm4;
2384  size_t k( kbegin );
2385 
2386  for( ; k<kpos; k+=SIMDSIZE ) {
2387  const SIMDType a1( A.load(i ,k) );
2388  const SIMDType a2( A.load(i+1UL,k) );
2389  const SIMDType b1( B.load(k,j ) );
2390  const SIMDType b2( B.load(k,j+1UL) );
2391  xmm1 += a1 * b1;
2392  xmm2 += a1 * b2;
2393  xmm3 += a2 * b1;
2394  xmm4 += a2 * b2;
2395  }
2396 
2397  (~C)(i ,j ) += sum( xmm1 );
2398  (~C)(i ,j+1UL) += sum( xmm2 );
2399  (~C)(i+1UL,j ) += sum( xmm3 );
2400  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2401 
2402  for( ; remainder && k<kend; ++k ) {
2403  (~C)(i ,j ) += A(i ,k) * B(k,j );
2404  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2405  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2406  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2407  }
2408  }
2409 
2410  if( j < jend )
2411  {
2412  const size_t kbegin( ( IsUpper<MT4>::value )
2413  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2414  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2415  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
2416 
2417  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2418  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2419 
2420  SIMDType xmm1, xmm2;
2421  size_t k( kbegin );
2422 
2423  for( ; k<kpos; k+=SIMDSIZE ) {
2424  const SIMDType b1( B.load(k,j) );
2425  xmm1 += A.load(i ,k) * b1;
2426  xmm2 += A.load(i+1UL,k) * b1;
2427  }
2428 
2429  (~C)(i ,j) += sum( xmm1 );
2430  (~C)(i+1UL,j) += sum( xmm2 );
2431 
2432  for( ; remainder && k<kend; ++k ) {
2433  (~C)(i ,j) += A(i ,k) * B(k,j);
2434  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
2435  }
2436  }
2437  }
2438 
2439  if( i < M )
2440  {
2441  const size_t jend( LOW ? i+1UL : N );
2442  size_t j( UPP ? i : 0UL );
2443 
2444  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
2445  {
2446  const size_t kbegin( ( IsUpper<MT4>::value )
2447  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2448  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2449  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
2450 
2451  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2452  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2453 
2454  SIMDType xmm1, xmm2, xmm3, xmm4;
2455  size_t k( kbegin );
2456 
2457  for( ; k<kpos; k+=SIMDSIZE ) {
2458  const SIMDType a1( A.load(i,k) );
2459  xmm1 += a1 * B.load(k,j );
2460  xmm2 += a1 * B.load(k,j+1UL);
2461  xmm3 += a1 * B.load(k,j+2UL);
2462  xmm4 += a1 * B.load(k,j+3UL);
2463  }
2464 
2465  (~C)(i,j ) += sum( xmm1 );
2466  (~C)(i,j+1UL) += sum( xmm2 );
2467  (~C)(i,j+2UL) += sum( xmm3 );
2468  (~C)(i,j+3UL) += sum( xmm4 );
2469 
2470  for( ; remainder && k<kend; ++k ) {
2471  (~C)(i,j ) += A(i,k) * B(k,j );
2472  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
2473  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL);
2474  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL);
2475  }
2476  }
2477 
2478  for( ; (j+2UL) <= jend; j+=2UL )
2479  {
2480  const size_t kbegin( ( IsUpper<MT4>::value )
2481  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2482  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2483  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
2484 
2485  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2486  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2487 
2488  SIMDType xmm1, xmm2;
2489  size_t k( kbegin );
2490 
2491  for( ; k<kpos; k+=SIMDSIZE ) {
2492  const SIMDType a1( A.load(i,k) );
2493  xmm1 += a1 * B.load(k,j );
2494  xmm2 += a1 * B.load(k,j+1UL);
2495  }
2496 
2497  (~C)(i,j ) += sum( xmm1 );
2498  (~C)(i,j+1UL) += sum( xmm2 );
2499 
2500  for( ; remainder && k<kend; ++k ) {
2501  (~C)(i,j ) += A(i,k) * B(k,j );
2502  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
2503  }
2504  }
2505 
2506  if( j < jend )
2507  {
2508  const size_t kbegin( ( IsUpper<MT4>::value )
2509  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2510  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2511 
2512  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
2513  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2514 
2515  SIMDType xmm1;
2516  size_t k( kbegin );
2517 
2518  for( ; k<kpos; k+=SIMDSIZE ) {
2519  xmm1 += A.load(i,k) * B.load(k,j);
2520  }
2521 
2522  (~C)(i,j) += sum( xmm1 );
2523 
2524  for( ; remainder && k<K; ++k ) {
2525  (~C)(i,j) += A(i,k) * B(k,j);
2526  }
2527  }
2528  }
2529  }
2531  //**********************************************************************************************
2532 
2533  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
2548  template< typename MT3 // Type of the left-hand side target matrix
2549  , typename MT4 // Type of the left-hand side matrix operand
2550  , typename MT5 > // Type of the right-hand side matrix operand
2552  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2553  {
2554  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
2555 
2556  const size_t M( A.rows() );
2557  const size_t N( B.columns() );
2558  const size_t K( A.columns() );
2559 
2560  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
2561 
2562  size_t i( 0UL );
2563 
2564  for( ; !LOW && !UPP && (i+4UL) <= M; i+=4UL )
2565  {
2566  size_t j( 0UL );
2567 
2568  for( ; (j+2UL) <= N; j+=2UL )
2569  {
2570  const size_t kbegin( ( IsUpper<MT4>::value )
2571  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2572  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2573  const size_t kend( ( IsLower<MT4>::value )
2574  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
2575  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2576 
2577  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2578  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2579 
2580  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2581  size_t k( kbegin );
2582 
2583  for( ; k<kpos; k+=SIMDSIZE ) {
2584  const SIMDType a1( A.load(i ,k) );
2585  const SIMDType a2( A.load(i+1UL,k) );
2586  const SIMDType a3( A.load(i+2UL,k) );
2587  const SIMDType a4( A.load(i+3UL,k) );
2588  const SIMDType b1( B.load(k,j ) );
2589  const SIMDType b2( B.load(k,j+1UL) );
2590  xmm1 += a1 * b1;
2591  xmm2 += a1 * b2;
2592  xmm3 += a2 * b1;
2593  xmm4 += a2 * b2;
2594  xmm5 += a3 * b1;
2595  xmm6 += a3 * b2;
2596  xmm7 += a4 * b1;
2597  xmm8 += a4 * b2;
2598  }
2599 
2600  (~C)(i ,j ) += sum( xmm1 );
2601  (~C)(i ,j+1UL) += sum( xmm2 );
2602  (~C)(i+1UL,j ) += sum( xmm3 );
2603  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2604  (~C)(i+2UL,j ) += sum( xmm5 );
2605  (~C)(i+2UL,j+1UL) += sum( xmm6 );
2606  (~C)(i+3UL,j ) += sum( xmm7 );
2607  (~C)(i+3UL,j+1UL) += sum( xmm8 );
2608 
2609  for( ; remainder && k<kend; ++k ) {
2610  (~C)(i ,j ) += A(i ,k) * B(k,j );
2611  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2612  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2613  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2614  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j );
2615  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL);
2616  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j );
2617  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL);
2618  }
2619  }
2620 
2621  if( j < N )
2622  {
2623  const size_t kbegin( ( IsUpper<MT4>::value )
2624  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2625  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2626  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
2627 
2628  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2629  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2630 
2631  SIMDType xmm1, xmm2, xmm3, xmm4;
2632  size_t k( kbegin );
2633 
2634  for( ; k<kpos; k+=SIMDSIZE ) {
2635  const SIMDType b1( B.load(k,j) );
2636  xmm1 += A.load(i ,k) * b1;
2637  xmm2 += A.load(i+1UL,k) * b1;
2638  xmm3 += A.load(i+2UL,k) * b1;
2639  xmm4 += A.load(i+3UL,k) * b1;
2640  }
2641 
2642  (~C)(i ,j) += sum( xmm1 );
2643  (~C)(i+1UL,j) += sum( xmm2 );
2644  (~C)(i+2UL,j) += sum( xmm3 );
2645  (~C)(i+3UL,j) += sum( xmm4 );
2646 
2647  for( ; remainder && k<kend; ++k ) {
2648  (~C)(i ,j) += A(i ,k) * B(k,j);
2649  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
2650  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j);
2651  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j);
2652  }
2653  }
2654  }
2655 
2656  for( ; (i+2UL) <= M; i+=2UL )
2657  {
2658  const size_t jend( LOW ? i+2UL : N );
2659  size_t j( UPP ? i : 0UL );
2660 
2661  for( ; (j+2UL) <= jend; j+=2UL )
2662  {
2663  const size_t kbegin( ( IsUpper<MT4>::value )
2664  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2665  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2666  const size_t kend( ( IsLower<MT4>::value )
2667  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
2668  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2669 
2670  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2671  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2672 
2673  SIMDType xmm1, xmm2, xmm3, xmm4;
2674  size_t k( kbegin );
2675 
2676  for( ; k<kpos; k+=SIMDSIZE ) {
2677  const SIMDType a1( A.load(i ,k) );
2678  const SIMDType a2( A.load(i+1UL,k) );
2679  const SIMDType b1( B.load(k,j ) );
2680  const SIMDType b2( B.load(k,j+1UL) );
2681  xmm1 += a1 * b1;
2682  xmm2 += a1 * b2;
2683  xmm3 += a2 * b1;
2684  xmm4 += a2 * b2;
2685  }
2686 
2687  (~C)(i ,j ) += sum( xmm1 );
2688  (~C)(i ,j+1UL) += sum( xmm2 );
2689  (~C)(i+1UL,j ) += sum( xmm3 );
2690  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2691 
2692  for( ; remainder && k<kend; ++k ) {
2693  (~C)(i ,j ) += A(i ,k) * B(k,j );
2694  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2695  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2696  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2697  }
2698  }
2699 
2700  if( j < jend )
2701  {
2702  const size_t kbegin( ( IsUpper<MT4>::value )
2703  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2704  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2705  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
2706 
2707  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2708  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2709 
2710  SIMDType xmm1, xmm2;
2711  size_t k( kbegin );
2712 
2713  for( ; k<kpos; k+=SIMDSIZE ) {
2714  const SIMDType b1( B.load(k,j) );
2715  xmm1 += A.load(i ,k) * b1;
2716  xmm2 += A.load(i+1UL,k) * b1;
2717  }
2718 
2719  (~C)(i ,j) += sum( xmm1 );
2720  (~C)(i+1UL,j) += sum( xmm2 );
2721 
2722  for( ; remainder && k<kend; ++k ) {
2723  (~C)(i ,j) += A(i ,k) * B(k,j);
2724  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
2725  }
2726  }
2727  }
2728 
2729  if( i < M )
2730  {
2731  const size_t jend( LOW ? i+1UL : N );
2732  size_t j( UPP ? i : 0UL );
2733 
2734  for( ; (j+2UL) <= jend; j+=2UL )
2735  {
2736  const size_t kbegin( ( IsUpper<MT4>::value )
2737  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2738  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2739  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
2740 
2741  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2742  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2743 
2744  SIMDType xmm1, xmm2;
2745  size_t k( kbegin );
2746 
2747  for( ; k<kpos; k+=SIMDSIZE ) {
2748  const SIMDType a1( A.load(i,k) );
2749  xmm1 += a1 * B.load(k,j );
2750  xmm2 += a1 * B.load(k,j+1UL);
2751  }
2752 
2753  (~C)(i,j ) += sum( xmm1 );
2754  (~C)(i,j+1UL) += sum( xmm2 );
2755 
2756  for( ; remainder && k<kend; ++k ) {
2757  (~C)(i,j ) += A(i,k) * B(k,j );
2758  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
2759  }
2760  }
2761 
2762  if( j < jend )
2763  {
2764  const size_t kbegin( ( IsUpper<MT4>::value )
2765  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2766  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2767 
2768  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
2769  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2770 
2771  SIMDType xmm1;
2772  size_t k( kbegin );
2773 
2774  for( ; k<kpos; k+=SIMDSIZE ) {
2775  xmm1 += A.load(i,k) * B.load(k,j);
2776  }
2777 
2778  (~C)(i,j) += sum( xmm1 );
2779 
2780  for( ; remainder && k<K; ++k ) {
2781  (~C)(i,j) += A(i,k) * B(k,j);
2782  }
2783  }
2784  }
2785  }
2787  //**********************************************************************************************
2788 
2789  //**Default addition assignment to dense matrices (large matrices)******************************
2803  template< typename MT3 // Type of the left-hand side target matrix
2804  , typename MT4 // Type of the left-hand side matrix operand
2805  , typename MT5 > // Type of the right-hand side matrix operand
2807  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2808  {
2809  selectDefaultAddAssignKernel( C, A, B );
2810  }
2812  //**********************************************************************************************
2813 
2814  //**Vectorized default addition assignment to dense matrices (large matrices)*******************
2829  template< typename MT3 // Type of the left-hand side target matrix
2830  , typename MT4 // Type of the left-hand side matrix operand
2831  , typename MT5 > // Type of the right-hand side matrix operand
2833  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2834  {
2835  if( LOW )
2836  lmmm( C, A, B, ElementType(1), ElementType(1) );
2837  else if( UPP )
2838  ummm( C, A, B, ElementType(1), ElementType(1) );
2839  else
2840  mmm( C, A, B, ElementType(1), ElementType(1) );
2841  }
2843  //**********************************************************************************************
2844 
2845  //**BLAS-based addition assignment to dense matrices (default)**********************************
2859  template< typename MT3 // Type of the left-hand side target matrix
2860  , typename MT4 // Type of the left-hand side matrix operand
2861  , typename MT5 > // Type of the right-hand side matrix operand
2863  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2864  {
2865  selectLargeAddAssignKernel( C, A, B );
2866  }
2868  //**********************************************************************************************
2869 
2870  //**BLAS-based addition assignment to dense matrices********************************************
2871 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
2872 
2885  template< typename MT3 // Type of the left-hand side target matrix
2886  , typename MT4 // Type of the left-hand side matrix operand
2887  , typename MT5 > // Type of the right-hand side matrix operand
2889  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2890  {
2891  typedef ElementType_<MT3> ET;
2892 
2893  if( IsTriangular<MT4>::value ) {
2894  ResultType_<MT3> tmp( serial( B ) );
2895  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2896  addAssign( C, tmp );
2897  }
2898  else if( IsTriangular<MT5>::value ) {
2899  ResultType_<MT3> tmp( serial( A ) );
2900  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2901  addAssign( C, tmp );
2902  }
2903  else {
2904  gemm( C, A, B, ET(1), ET(1) );
2905  }
2906  }
2908 #endif
2909  //**********************************************************************************************
2910 
2911  //**Addition assignment to sparse matrices******************************************************
2912  // No special implementation for the addition assignment to sparse matrices.
2913  //**********************************************************************************************
2914 
2915  //**Subtraction assignment to dense matrices****************************************************
2928  template< typename MT // Type of the target dense matrix
2929  , bool SO > // Storage order of the target dense matrix
2930  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2931  {
2933 
2934  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2935  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2936 
2937  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2938  return;
2939  }
2940 
2941  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
2942  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
2943 
2944  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2945  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2946  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2947  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2948  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2949  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2950 
2951  DMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
2952  }
2954  //**********************************************************************************************
2955 
2956  //**Subtraction assignment to dense matrices (kernel selection)*********************************
2967  template< typename MT3 // Type of the left-hand side target matrix
2968  , typename MT4 // Type of the left-hand side matrix operand
2969  , typename MT5 > // Type of the right-hand side matrix operand
2970  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2971  {
2973  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
2974  selectSmallSubAssignKernel( C, A, B );
2975  else
2976  selectBlasSubAssignKernel( C, A, B );
2977  }
2979  //**********************************************************************************************
2980 
2981  //**Default subtraction assignment to row-major dense matrices (general/general)****************
2995  template< typename MT3 // Type of the left-hand side target matrix
2996  , typename MT4 // Type of the left-hand side matrix operand
2997  , typename MT5 > // Type of the right-hand side matrix operand
2998  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
2999  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3000  {
3001  const size_t M( A.rows() );
3002  const size_t N( B.columns() );
3003  const size_t K( A.columns() );
3004 
3005  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
3006 
3007  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
3008  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
3009  :( 0UL ) );
3010  const size_t iend( ( IsStrictlyUpper<MT4>::value )
3011  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
3012  :( M ) );
3013  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3014 
3015  for( size_t i=ibegin; i<iend; ++i )
3016  {
3017  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
3019  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
3020  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
3022  ?( UPP ? max( i, 1UL ) : 1UL )
3023  :( UPP ? i : 0UL ) ) );
3024  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
3026  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
3027  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
3029  ?( LOW ? min(i+1UL,N-1UL) : N-1UL )
3030  :( LOW ? i+1UL : N ) ) );
3031 
3032  if( ( LOW || UPP ) && ( jbegin > jend ) ) continue;
3033  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3034 
3035  for( size_t j=jbegin; j<jend; ++j )
3036  {
3037  const size_t kbegin( ( IsUpper<MT4>::value )
3038  ?( ( IsLower<MT5>::value )
3039  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
3040  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3041  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3042  :( ( IsLower<MT5>::value )
3043  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3044  :( 0UL ) ) );
3045  const size_t kend( ( IsLower<MT4>::value )
3046  ?( ( IsUpper<MT5>::value )
3047  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
3048  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
3049  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
3050  :( ( IsUpper<MT5>::value )
3051  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3052  :( K ) ) );
3053  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
3054 
3055  const size_t knum( kend - kbegin );
3056  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
3057 
3058  for( size_t k=kbegin; k<kpos; k+=2UL ) {
3059  (~C)(i,j) -= A(i,k ) * B(k ,j);
3060  (~C)(i,j) -= A(i,k+1UL) * B(k+1UL,j);
3061  }
3062  if( kpos < kend ) {
3063  (~C)(i,j) -= A(i,kpos) * B(kpos,j);
3064  }
3065  }
3066  }
3067  }
3069  //**********************************************************************************************
3070 
3071  //**Default subtraction assignment to column-major dense matrices (general/general)*************
3085  template< typename MT3 // Type of the left-hand side target matrix
3086  , typename MT4 // Type of the left-hand side matrix operand
3087  , typename MT5 > // Type of the right-hand side matrix operand
3088  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
3089  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3090  {
3091  const size_t M( A.rows() );
3092  const size_t N( B.columns() );
3093  const size_t K( A.columns() );
3094 
3095  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
3096 
3097  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
3098  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
3099  :( 0UL ) );
3100  const size_t jend( ( IsStrictlyLower<MT5>::value )
3101  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
3102  :( N ) );
3103  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3104 
3105  for( size_t j=jbegin; j<jend; ++j )
3106  {
3107  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
3109  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
3110  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3112  ?( LOW ? max( j, 1UL ) : 1UL )
3113  :( LOW ? j : 0UL ) ) );
3114  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
3116  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
3117  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
3119  ?( UPP ? min(j+1UL,M-1UL) : M-1UL )
3120  :( UPP ? j+1UL : M ) ) );
3121 
3122  if( ( LOW || UPP ) && ( ibegin > iend ) ) continue;
3123  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3124 
3125  for( size_t i=ibegin; i<iend; ++i )
3126  {
3127  const size_t kbegin( ( IsUpper<MT4>::value )
3128  ?( ( IsLower<MT5>::value )
3129  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
3130  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3131  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3132  :( ( IsLower<MT5>::value )
3133  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3134  :( 0UL ) ) );
3135  const size_t kend( ( IsLower<MT4>::value )
3136  ?( ( IsUpper<MT5>::value )
3137  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
3138  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
3139  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
3140  :( ( IsUpper<MT5>::value )
3141  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3142  :( K ) ) );
3143  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
3144 
3145  const size_t knum( kend - kbegin );
3146  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
3147 
3148  for( size_t k=kbegin; k<kpos; k+=2UL ) {
3149  (~C)(i,j) -= A(i,k ) * B(k ,j);
3150  (~C)(i,j) -= A(i,k+1UL) * B(k+1UL,j);
3151  }
3152  if( kpos < kend ) {
3153  (~C)(i,j) -= A(i,kpos) * B(kpos,j);
3154  }
3155  }
3156  }
3157  }
3159  //**********************************************************************************************
3160 
3161  //**Default subtraction assignment to row-major dense matrices (general/diagonal)***************
3175  template< typename MT3 // Type of the left-hand side target matrix
3176  , typename MT4 // Type of the left-hand side matrix operand
3177  , typename MT5 > // Type of the right-hand side matrix operand
3178  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
3179  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3180  {
3181  const size_t M( A.rows() );
3182  const size_t N( B.columns() );
3183 
3184  for( size_t i=0UL; i<M; ++i )
3185  {
3186  const size_t jbegin( ( IsUpper<MT4>::value )
3187  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
3188  :( 0UL ) );
3189  const size_t jend( ( IsLower<MT4>::value )
3190  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
3191  :( N ) );
3192  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3193 
3194  const size_t jnum( jend - jbegin );
3195  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
3196 
3197  for( size_t j=jbegin; j<jpos; j+=2UL ) {
3198  (~C)(i,j ) -= A(i,j ) * B(j ,j );
3199  (~C)(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL);
3200  }
3201  if( jpos < jend ) {
3202  (~C)(i,jpos) -= A(i,jpos) * B(jpos,jpos);
3203  }
3204  }
3205  }
3207  //**********************************************************************************************
3208 
3209  //**Default subtraction assignment to column-major dense matrices (general/diagonal)************
3223  template< typename MT3 // Type of the left-hand side target matrix
3224  , typename MT4 // Type of the left-hand side matrix operand
3225  , typename MT5 > // Type of the right-hand side matrix operand
3226  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
3227  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3228  {
3229  constexpr size_t block( BLOCK_SIZE );
3230 
3231  const size_t M( A.rows() );
3232  const size_t N( B.columns() );
3233 
3234  for( size_t jj=0UL; jj<N; jj+=block ) {
3235  const size_t jend( min( N, jj+block ) );
3236  for( size_t ii=0UL; ii<M; ii+=block ) {
3237  const size_t iend( min( M, ii+block ) );
3238  for( size_t j=jj; j<jend; ++j )
3239  {
3240  const size_t ibegin( ( IsLower<MT4>::value )
3241  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
3242  :( ii ) );
3243  const size_t ipos( ( IsUpper<MT4>::value )
3244  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
3245  :( iend ) );
3246 
3247  for( size_t i=ibegin; i<ipos; ++i ) {
3248  (~C)(i,j) -= A(i,j) * B(j,j);
3249  }
3250  }
3251  }
3252  }
3253  }
3255  //**********************************************************************************************
3256 
3257  //**Default subtraction assignment to row-major dense matrices (diagonal/general)***************
3271  template< typename MT3 // Type of the left-hand side target matrix
3272  , typename MT4 // Type of the left-hand side matrix operand
3273  , typename MT5 > // Type of the right-hand side matrix operand
3274  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
3275  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3276  {
3277  constexpr size_t block( BLOCK_SIZE );
3278 
3279  const size_t M( A.rows() );
3280  const size_t N( B.columns() );
3281 
3282  for( size_t ii=0UL; ii<M; ii+=block ) {
3283  const size_t iend( min( M, ii+block ) );
3284  for( size_t jj=0UL; jj<N; jj+=block ) {
3285  const size_t jend( min( N, jj+block ) );
3286  for( size_t i=ii; i<iend; ++i )
3287  {
3288  const size_t jbegin( ( IsUpper<MT5>::value )
3289  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
3290  :( jj ) );
3291  const size_t jpos( ( IsLower<MT5>::value )
3292  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
3293  :( jend ) );
3294 
3295  for( size_t j=jbegin; j<jpos; ++j ) {
3296  (~C)(i,j) -= A(i,i) * B(i,j);
3297  }
3298  }
3299  }
3300  }
3301  }
3303  //**********************************************************************************************
3304 
3305  //**Default subtraction assignment to column-major dense matrices (diagonal/general)************
3319  template< typename MT3 // Type of the left-hand side target matrix
3320  , typename MT4 // Type of the left-hand side matrix operand
3321  , typename MT5 > // Type of the right-hand side matrix operand
3322  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
3323  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3324  {
3325  const size_t M( A.rows() );
3326  const size_t N( B.columns() );
3327 
3328  for( size_t j=0UL; j<N; ++j )
3329  {
3330  const size_t ibegin( ( IsLower<MT5>::value )
3331  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3332  :( 0UL ) );
3333  const size_t iend( ( IsUpper<MT5>::value )
3334  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3335  :( M ) );
3336  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3337 
3338  const size_t inum( iend - ibegin );
3339  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
3340 
3341  for( size_t i=ibegin; i<ipos; i+=2UL ) {
3342  (~C)(i ,j) -= A(i ,i ) * B(i ,j);
3343  (~C)(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j);
3344  }
3345  if( ipos < iend ) {
3346  (~C)(ipos,j) -= A(ipos,ipos) * B(ipos,j);
3347  }
3348  }
3349  }
3351  //**********************************************************************************************
3352 
3353  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
3367  template< typename MT3 // Type of the left-hand side target matrix
3368  , typename MT4 // Type of the left-hand side matrix operand
3369  , typename MT5 > // Type of the right-hand side matrix operand
3370  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
3371  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3372  {
3373  for( size_t i=0UL; i<A.rows(); ++i ) {
3374  C(i,i) -= A(i,i) * B(i,i);
3375  }
3376  }
3378  //**********************************************************************************************
3379 
3380  //**Default subtraction assignment to dense matrices (small matrices)***************************
3394  template< typename MT3 // Type of the left-hand side target matrix
3395  , typename MT4 // Type of the left-hand side matrix operand
3396  , typename MT5 > // Type of the right-hand side matrix operand
3398  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3399  {
3400  selectDefaultSubAssignKernel( ~C, A, B );
3401  }
3403  //**********************************************************************************************
3404 
3405  //**Default subtraction assignment to row-major dense matrices (small matrices)*****************
3420  template< typename MT3 // Type of the left-hand side target matrix
3421  , typename MT4 // Type of the left-hand side matrix operand
3422  , typename MT5 > // Type of the right-hand side matrix operand
3424  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3425  {
3426  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
3427 
3428  const size_t M( A.rows() );
3429  const size_t N( B.columns() );
3430  const size_t K( A.columns() );
3431 
3432  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
3433 
3434  size_t i( 0UL );
3435 
3436  for( ; (i+2UL) <= M; i+=2UL )
3437  {
3438  const size_t jend( LOW ? i+2UL : N );
3439  size_t j( UPP ? i : 0UL );
3440 
3441  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
3442  {
3443  const size_t kbegin( ( IsUpper<MT4>::value )
3444  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3445  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3446  const size_t kend( ( IsLower<MT4>::value )
3447  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
3448  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
3449 
3450  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3451  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3452 
3453  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3454  size_t k( kbegin );
3455 
3456  for( ; k<kpos; k+=SIMDSIZE ) {
3457  const SIMDType a1( A.load(i ,k) );
3458  const SIMDType a2( A.load(i+1UL,k) );
3459  const SIMDType b1( B.load(k,j ) );
3460  const SIMDType b2( B.load(k,j+1UL) );
3461  const SIMDType b3( B.load(k,j+2UL) );
3462  const SIMDType b4( B.load(k,j+3UL) );
3463  xmm1 += a1 * b1;
3464  xmm2 += a1 * b2;
3465  xmm3 += a1 * b3;
3466  xmm4 += a1 * b4;
3467  xmm5 += a2 * b1;
3468  xmm6 += a2 * b2;
3469  xmm7 += a2 * b3;
3470  xmm8 += a2 * b4;
3471  }
3472 
3473  (~C)(i ,j ) -= sum( xmm1 );
3474  (~C)(i ,j+1UL) -= sum( xmm2 );
3475  (~C)(i ,j+2UL) -= sum( xmm3 );
3476  (~C)(i ,j+3UL) -= sum( xmm4 );
3477  (~C)(i+1UL,j ) -= sum( xmm5 );
3478  (~C)(i+1UL,j+1UL) -= sum( xmm6 );
3479  (~C)(i+1UL,j+2UL) -= sum( xmm7 );
3480  (~C)(i+1UL,j+3UL) -= sum( xmm8 );
3481 
3482  for( ; remainder && k<kend; ++k ) {
3483  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3484  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3485  (~C)(i ,j+2UL) -= A(i ,k) * B(k,j+2UL);
3486  (~C)(i ,j+3UL) -= A(i ,k) * B(k,j+3UL);
3487  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3488  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3489  (~C)(i+1UL,j+2UL) -= A(i+1UL,k) * B(k,j+2UL);
3490  (~C)(i+1UL,j+3UL) -= A(i+1UL,k) * B(k,j+3UL);
3491  }
3492  }
3493 
3494  for( ; (j+2UL) <= jend; j+=2UL )
3495  {
3496  const size_t kbegin( ( IsUpper<MT4>::value )
3497  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3498  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3499  const size_t kend( ( IsLower<MT4>::value )
3500  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
3501  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3502 
3503  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3504  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3505 
3506  SIMDType xmm1, xmm2, xmm3, xmm4;
3507  size_t k( kbegin );
3508 
3509  for( ; k<kpos; k+=SIMDSIZE ) {
3510  const SIMDType a1( A.load(i ,k) );
3511  const SIMDType a2( A.load(i+1UL,k) );
3512  const SIMDType b1( B.load(k,j ) );
3513  const SIMDType b2( B.load(k,j+1UL) );
3514  xmm1 += a1 * b1;
3515  xmm2 += a1 * b2;
3516  xmm3 += a2 * b1;
3517  xmm4 += a2 * b2;
3518  }
3519 
3520  (~C)(i ,j ) -= sum( xmm1 );
3521  (~C)(i ,j+1UL) -= sum( xmm2 );
3522  (~C)(i+1UL,j ) -= sum( xmm3 );
3523  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3524 
3525  for( ; remainder && k<kend; ++k ) {
3526  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3527  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3528  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3529  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3530  }
3531  }
3532 
3533  if( j < jend )
3534  {
3535  const size_t kbegin( ( IsUpper<MT4>::value )
3536  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3537  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3538  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
3539 
3540  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3541  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3542 
3543  SIMDType xmm1, xmm2;
3544  size_t k( kbegin );
3545 
3546  for( ; k<kpos; k+=SIMDSIZE ) {
3547  const SIMDType b1( B.load(k,j) );
3548  xmm1 += A.load(i ,k) * b1;
3549  xmm2 += A.load(i+1UL,k) * b1;
3550  }
3551 
3552  (~C)(i ,j) -= sum( xmm1 );
3553  (~C)(i+1UL,j) -= sum( xmm2 );
3554 
3555  for( ; remainder && k<kend; ++k ) {
3556  (~C)(i ,j) -= A(i ,k) * B(k,j);
3557  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j);
3558  }
3559  }
3560  }
3561 
3562  if( i < M )
3563  {
3564  const size_t jend( LOW ? i+1UL : N );
3565  size_t j( UPP ? i : 0UL );
3566 
3567  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
3568  {
3569  const size_t kbegin( ( IsUpper<MT4>::value )
3570  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3571  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3572  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
3573 
3574  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3575  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3576 
3577  SIMDType xmm1, xmm2, xmm3, xmm4;
3578  size_t k( kbegin );
3579 
3580  for( ; k<kpos; k+=SIMDSIZE ) {
3581  const SIMDType a1( A.load(i,k) );
3582  xmm1 += a1 * B.load(k,j );
3583  xmm2 += a1 * B.load(k,j+1UL);
3584  xmm3 += a1 * B.load(k,j+2UL);
3585  xmm4 += a1 * B.load(k,j+3UL);
3586  }
3587 
3588  (~C)(i,j ) -= sum( xmm1 );
3589  (~C)(i,j+1UL) -= sum( xmm2 );
3590  (~C)(i,j+2UL) -= sum( xmm3 );
3591  (~C)(i,j+3UL) -= sum( xmm4 );
3592 
3593  for( ; remainder && k<kend; ++k ) {
3594  (~C)(i,j ) -= A(i,k) * B(k,j );
3595  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL);
3596  (~C)(i,j+2UL) -= A(i,k) * B(k,j+2UL);
3597  (~C)(i,j+3UL) -= A(i,k) * B(k,j+3UL);
3598  }
3599  }
3600 
3601  for( ; (j+2UL) <= jend; j+=2UL )
3602  {
3603  const size_t kbegin( ( IsUpper<MT4>::value )
3604  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3605  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3606  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
3607 
3608  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3609  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3610 
3611  SIMDType xmm1, xmm2;
3612  size_t k( kbegin );
3613 
3614  for( ; k<kpos; k+=SIMDSIZE ) {
3615  const SIMDType a1( A.load(i,k) );
3616  xmm1 += a1 * B.load(k,j );
3617  xmm2 += a1 * B.load(k,j+1UL);
3618  }
3619 
3620  (~C)(i,j ) -= sum( xmm1 );
3621  (~C)(i,j+1UL) -= sum( xmm2 );
3622 
3623  for( ; remainder && k<kend; ++k ) {
3624  (~C)(i,j ) -= A(i,k) * B(k,j );
3625  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL);
3626  }
3627  }
3628 
3629  if( j < jend )
3630  {
3631  const size_t kbegin( ( IsUpper<MT4>::value )
3632  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3633  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3634 
3635  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
3636  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3637 
3638  SIMDType xmm1;
3639  size_t k( kbegin );
3640 
3641  for( ; k<kpos; k+=SIMDSIZE ) {
3642  xmm1 += A.load(i,k) * B.load(k,j);
3643  }
3644 
3645  (~C)(i,j) -= sum( xmm1 );
3646 
3647  for( ; remainder && k<K; ++k ) {
3648  (~C)(i,j) -= A(i,k) * B(k,j);
3649  }
3650  }
3651  }
3652  }
3654  //**********************************************************************************************
3655 
3656  //**Default subtraction assignment to column-major dense matrices (small matrices)**************
3671  template< typename MT3 // Type of the left-hand side target matrix
3672  , typename MT4 // Type of the left-hand side matrix operand
3673  , typename MT5 > // Type of the right-hand side matrix operand
3675  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3676  {
3677  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
3678 
3679  const size_t M( A.rows() );
3680  const size_t N( B.columns() );
3681  const size_t K( A.columns() );
3682 
3683  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
3684 
3685  size_t i( 0UL );
3686 
3687  for( ; !LOW && !UPP && (i+4UL) <= M; i+=4UL )
3688  {
3689  size_t j( 0UL );
3690 
3691  for( ; (j+2UL) <= N; j+=2UL )
3692  {
3693  const size_t kbegin( ( IsUpper<MT4>::value )
3694  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3695  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3696  const size_t kend( ( IsLower<MT4>::value )
3697  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
3698  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3699 
3700  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3701  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3702 
3703  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3704  size_t k( kbegin );
3705 
3706  for( ; k<kpos; k+=SIMDSIZE ) {
3707  const SIMDType a1( A.load(i ,k) );
3708  const SIMDType a2( A.load(i+1UL,k) );
3709  const SIMDType a3( A.load(i+2UL,k) );
3710  const SIMDType a4( A.load(i+3UL,k) );
3711  const SIMDType b1( B.load(k,j ) );
3712  const SIMDType b2( B.load(k,j+1UL) );
3713  xmm1 += a1 * b1;
3714  xmm2 += a1 * b2;
3715  xmm3 += a2 * b1;
3716  xmm4 += a2 * b2;
3717  xmm5 += a3 * b1;
3718  xmm6 += a3 * b2;
3719  xmm7 += a4 * b1;
3720  xmm8 += a4 * b2;
3721  }
3722 
3723  (~C)(i ,j ) -= sum( xmm1 );
3724  (~C)(i ,j+1UL) -= sum( xmm2 );
3725  (~C)(i+1UL,j ) -= sum( xmm3 );
3726  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3727  (~C)(i+2UL,j ) -= sum( xmm5 );
3728  (~C)(i+2UL,j+1UL) -= sum( xmm6 );
3729  (~C)(i+3UL,j ) -= sum( xmm7 );
3730  (~C)(i+3UL,j+1UL) -= sum( xmm8 );
3731 
3732  for( ; remainder && k<kend; ++k ) {
3733  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3734  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3735  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3736  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3737  (~C)(i+2UL,j ) -= A(i+2UL,k) * B(k,j );
3738  (~C)(i+2UL,j+1UL) -= A(i+2UL,k) * B(k,j+1UL);
3739  (~C)(i+3UL,j ) -= A(i+3UL,k) * B(k,j );
3740  (~C)(i+3UL,j+1UL) -= A(i+3UL,k) * B(k,j+1UL);
3741  }
3742  }
3743 
3744  if( j < N )
3745  {
3746  const size_t kbegin( ( IsUpper<MT4>::value )
3747  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3748  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3749  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
3750 
3751  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3752  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3753 
3754  SIMDType xmm1, xmm2, xmm3, xmm4;
3755  size_t k( kbegin );
3756 
3757  for( ; k<kpos; k+=SIMDSIZE ) {
3758  const SIMDType b1( B.load(k,j) );
3759  xmm1 += A.load(i ,k) * b1;
3760  xmm2 += A.load(i+1UL,k) * b1;
3761  xmm3 += A.load(i+2UL,k) * b1;
3762  xmm4 += A.load(i+3UL,k) * b1;
3763  }
3764 
3765  (~C)(i ,j) -= sum( xmm1 );
3766  (~C)(i+1UL,j) -= sum( xmm2 );
3767  (~C)(i+2UL,j) -= sum( xmm3 );
3768  (~C)(i+3UL,j) -= sum( xmm4 );
3769 
3770  for( ; remainder && k<kend; ++k ) {
3771  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3772  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3773  (~C)(i+2UL,j ) -= A(i+2UL,k) * B(k,j );
3774  (~C)(i+3UL,j ) -= A(i+3UL,k) * B(k,j );
3775  }
3776  }
3777  }
3778 
3779  for( ; (i+2UL) <= M; i+=2UL )
3780  {
3781  const size_t jend( LOW ? i+2UL : N );
3782  size_t j( UPP ? i : 0UL );
3783 
3784  for( ; (j+2UL) <= jend; j+=2UL )
3785  {
3786  const size_t kbegin( ( IsUpper<MT4>::value )
3787  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3788  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3789  const size_t kend( ( IsLower<MT4>::value )
3790  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
3791  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3792 
3793  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3794  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3795 
3796  SIMDType xmm1, xmm2, xmm3, xmm4;
3797  size_t k( kbegin );
3798 
3799  for( ; k<kpos; k+=SIMDSIZE ) {
3800  const SIMDType a1( A.load(i ,k) );
3801  const SIMDType a2( A.load(i+1UL,k) );
3802  const SIMDType b1( B.load(k,j ) );
3803  const SIMDType b2( B.load(k,j+1UL) );
3804  xmm1 += a1 * b1;
3805  xmm2 += a1 * b2;
3806  xmm3 += a2 * b1;
3807  xmm4 += a2 * b2;
3808  }
3809 
3810  (~C)(i ,j ) -= sum( xmm1 );
3811  (~C)(i ,j+1UL) -= sum( xmm2 );
3812  (~C)(i+1UL,j ) -= sum( xmm3 );
3813  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3814 
3815  for( ; remainder && k<kend; ++k ) {
3816  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3817  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3818  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3819  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3820  }
3821  }
3822 
3823  if( j < jend )
3824  {
3825  const size_t kbegin( ( IsUpper<MT4>::value )
3826  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3827  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3828  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
3829 
3830  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3831  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3832 
3833  SIMDType xmm1, xmm2;
3834  size_t k( kbegin );
3835 
3836  for( ; k<kpos; k+=SIMDSIZE ) {
3837  const SIMDType b1( B.load(k,j) );
3838  xmm1 += A.load(i ,k) * b1;
3839  xmm2 += A.load(i+1UL,k) * b1;
3840  }
3841 
3842  (~C)(i ,j) -= sum( xmm1 );
3843  (~C)(i+1UL,j) -= sum( xmm2 );
3844 
3845  for( ; remainder && k<kend; ++k ) {
3846  (~C)(i ,j) -= A(i ,k) * B(k,j);
3847  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j);
3848  }
3849  }
3850  }
3851 
3852  if( i < M )
3853  {
3854  const size_t jend( LOW ? i+1UL : N );
3855  size_t j( UPP ? i : 0UL );
3856 
3857  for( ; (j+2UL) <= jend; j+=2UL )
3858  {
3859  const size_t kbegin( ( IsUpper<MT4>::value )
3860  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3861  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3862  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
3863 
3864  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3865  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3866 
3867  SIMDType xmm1, xmm2;
3868  size_t k( kbegin );
3869 
3870  for( ; k<kpos; k+=SIMDSIZE ) {
3871  const SIMDType a1( A.load(i,k) );
3872  xmm1 += a1 * B.load(k,j );
3873  xmm2 += a1 * B.load(k,j+1UL);
3874  }
3875 
3876  (~C)(i,j ) -= sum( xmm1 );
3877  (~C)(i,j+1UL) -= sum( xmm2 );
3878 
3879  for( ; remainder && k<kend; ++k ) {
3880  (~C)(i,j ) -= A(i,k) * B(k,j );
3881  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL);
3882  }
3883  }
3884 
3885  if( j < jend )
3886  {
3887  const size_t kbegin( ( IsUpper<MT4>::value )
3888  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3889  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3890 
3891  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
3892  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3893 
3894  SIMDType xmm1;
3895  size_t k( kbegin );
3896 
3897  for( ; k<kpos; k+=SIMDSIZE ) {
3898  xmm1 += A.load(i,k) * B.load(k,j);
3899  }
3900 
3901  (~C)(i,j) -= sum( xmm1 );
3902 
3903  for( ; remainder && k<K; ++k ) {
3904  (~C)(i,j) -= A(i,k) * B(k,j);
3905  }
3906  }
3907  }
3908  }
3910  //**********************************************************************************************
3911 
3912  //**Default subtraction assignment to dense matrices (large matrices)***************************
3926  template< typename MT3 // Type of the left-hand side target matrix
3927  , typename MT4 // Type of the left-hand side matrix operand
3928  , typename MT5 > // Type of the right-hand side matrix operand
3930  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3931  {
3932  selectDefaultSubAssignKernel( ~C, A, B );
3933  }
3935  //**********************************************************************************************
3936 
3937  //**Default subtraction assignment to dense matrices (large matrices)***************************
3952  template< typename MT3 // Type of the left-hand side target matrix
3953  , typename MT4 // Type of the left-hand side matrix operand
3954  , typename MT5 > // Type of the right-hand side matrix operand
3956  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3957  {
3958  if( LOW )
3959  lmmm( C, A, B, ElementType(-1), ElementType(1) );
3960  else if( UPP )
3961  ummm( C, A, B, ElementType(-1), ElementType(1) );
3962  else
3963  mmm( C, A, B, ElementType(-1), ElementType(1) );
3964  }
3966  //**********************************************************************************************
3967 
3968  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3982  template< typename MT3 // Type of the left-hand side target matrix
3983  , typename MT4 // Type of the left-hand side matrix operand
3984  , typename MT5 > // Type of the right-hand side matrix operand
3986  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3987  {
3988  selectLargeSubAssignKernel( C, A, B );
3989  }
3991  //**********************************************************************************************
3992 
3993  //**BLAS-based subraction assignment to dense matrices******************************************
3994 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
3995 
4008  template< typename MT3 // Type of the left-hand side target matrix
4009  , typename MT4 // Type of the left-hand side matrix operand
4010  , typename MT5 > // Type of the right-hand side matrix operand
4012  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
4013  {
4014  typedef ElementType_<MT3> ET;
4015 
4016  if( IsTriangular<MT4>::value ) {
4017  ResultType_<MT3> tmp( serial( B ) );
4018  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
4019  subAssign( C, tmp );
4020  }
4021  else if( IsTriangular<MT5>::value ) {
4022  ResultType_<MT3> tmp( serial( A ) );
4023  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
4024  subAssign( C, tmp );
4025  }
4026  else {
4027  gemm( C, A, B, ET(-1), ET(1) );
4028  }
4029  }
4031 #endif
4032  //**********************************************************************************************
4033 
4034  //**Subtraction assignment to sparse matrices***************************************************
4035  // No special implementation for the subtraction assignment to sparse matrices.
4036  //**********************************************************************************************
4037 
4038  //**Multiplication assignment to dense matrices*************************************************
4039  // No special implementation for the multiplication assignment to dense matrices.
4040  //**********************************************************************************************
4041 
4042  //**Multiplication assignment to sparse matrices************************************************
4043  // No special implementation for the multiplication assignment to sparse matrices.
4044  //**********************************************************************************************
4045 
4046  //**SMP assignment to dense matrices************************************************************
4061  template< typename MT // Type of the target dense matrix
4062  , bool SO > // Storage order of the target dense matrix
4064  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4065  {
4067 
4068  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4069  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4070 
4071  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4072  return;
4073  }
4074  else if( rhs.lhs_.columns() == 0UL ) {
4075  reset( ~lhs );
4076  return;
4077  }
4078 
4079  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4080  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4081 
4082  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4083  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4084  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4085  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4086  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4087  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4088 
4089  smpAssign( ~lhs, A * B );
4090  }
4092  //**********************************************************************************************
4093 
4094  //**SMP assignment to sparse matrices***********************************************************
4109  template< typename MT // Type of the target sparse matrix
4110  , bool SO > // Storage order of the target sparse matrix
4112  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4113  {
4115 
4117 
4124 
4125  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4126  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4127 
4128  const ForwardFunctor fwd;
4129 
4130  const TmpType tmp( rhs );
4131  smpAssign( ~lhs, fwd( tmp ) );
4132  }
4134  //**********************************************************************************************
4135 
4136  //**SMP addition assignment to dense matrices***************************************************
4152  template< typename MT // Type of the target dense matrix
4153  , bool SO > // Storage order of the target dense matrix
4156  {
4158 
4159  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4160  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4161 
4162  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4163  return;
4164  }
4165 
4166  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4167  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4168 
4169  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4170  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4171  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4172  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4173  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4174  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4175 
4176  smpAddAssign( ~lhs, A * B );
4177  }
4179  //**********************************************************************************************
4180 
4181  //**SMP addition assignment to sparse matrices**************************************************
4182  // No special implementation for the SMP addition assignment to sparse matrices.
4183  //**********************************************************************************************
4184 
4185  //**SMP subtraction assignment to dense matrices************************************************
4201  template< typename MT // Type of the target dense matrix
4202  , bool SO > // Storage order of the target dense matrix
4205  {
4207 
4208  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4209  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4210 
4211  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4212  return;
4213  }
4214 
4215  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4216  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4217 
4218  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4219  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4220  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4221  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4222  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4223  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4224 
4225  smpSubAssign( ~lhs, A * B );
4226  }
4228  //**********************************************************************************************
4229 
4230  //**SMP subtraction assignment to sparse matrices***********************************************
4231  // No special implementation for the SMP subtraction assignment to sparse matrices.
4232  //**********************************************************************************************
4233 
4234  //**SMP multiplication assignment to dense matrices*********************************************
4235  // No special implementation for the SMP multiplication assignment to dense matrices.
4236  //**********************************************************************************************
4237 
4238  //**SMP multiplication assignment to sparse matrices********************************************
4239  // No special implementation for the SMP multiplication assignment to sparse matrices.
4240  //**********************************************************************************************
4241 
4242  //**Compile time checks*************************************************************************
4250  //**********************************************************************************************
4251 };
4252 //*************************************************************************************************
4253 
4254 
4255 
4256 
4257 //=================================================================================================
4258 //
4259 // DMATSCALARMULTEXPR SPECIALIZATION
4260 //
4261 //=================================================================================================
4262 
4263 //*************************************************************************************************
4271 template< typename MT1 // Type of the left-hand side dense matrix
4272  , typename MT2 // Type of the right-hand side dense matrix
4273  , bool SF // Symmetry flag
4274  , bool HF // Hermitian flag
4275  , bool LF // Lower flag
4276  , bool UF // Upper flag
4277  , typename ST > // Type of the right-hand side scalar value
4278 class DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, ST, false >
4279  : public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, ST, false >, false >
4280  , private MatScalarMultExpr
4281  , private Computation
4282 {
4283  private:
4284  //**Type definitions****************************************************************************
4287 
4288  typedef ResultType_<MMM> RES;
4289  typedef ResultType_<MT1> RT1;
4290  typedef ResultType_<MT2> RT2;
4291  typedef ElementType_<RT1> ET1;
4292  typedef ElementType_<RT2> ET2;
4293  typedef CompositeType_<MT1> CT1;
4294  typedef CompositeType_<MT2> CT2;
4295  //**********************************************************************************************
4296 
4297  //**********************************************************************************************
4299  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
4300  //**********************************************************************************************
4301 
4302  //**********************************************************************************************
4304  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
4305  //**********************************************************************************************
4306 
4307  //**********************************************************************************************
4309  enum : bool {
4310  SYM = ( SF && !( HF || LF || UF ) ),
4311  HERM = ( HF && !( LF || UF ) ),
4312  LOW = ( LF || ( ( SF || HF ) && UF ) ),
4313  UPP = ( UF || ( ( SF || HF ) && LF ) )
4314  };
4315  //**********************************************************************************************
4316 
4317  //**********************************************************************************************
4319 
4322  template< typename T1, typename T2, typename T3 >
4323  struct IsEvaluationRequired {
4324  enum : bool { value = ( evaluateLeft || evaluateRight ) };
4325  };
4326  //**********************************************************************************************
4327 
4328  //**********************************************************************************************
4330 
4332  template< typename T1, typename T2, typename T3, typename T4 >
4333  struct UseBlasKernel {
4335  !SYM && !HERM && !LOW && !UPP &&
4340  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
4345  IsSame< ElementType_<T1>, ElementType_<T3> >::value &&
4347  };
4348  //**********************************************************************************************
4349 
4350  //**********************************************************************************************
4352 
4354  template< typename T1, typename T2, typename T3, typename T4 >
4355  struct UseVectorizedDefaultKernel {
4356  enum : bool { value = useOptimizedKernels &&
4358  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
4362  , T4 >::value &&
4363  HasSIMDAdd< ElementType_<T2>, ElementType_<T3> >::value &&
4364  HasSIMDMult< ElementType_<T2>, ElementType_<T3> >::value };
4365  };
4366  //**********************************************************************************************
4367 
4368  //**********************************************************************************************
4370 
4372  typedef IfTrue_< HERM
4373  , DeclHerm
4374  , IfTrue_< SYM
4375  , DeclSym
4376  , IfTrue_< LOW
4377  , IfTrue_< UPP
4378  , DeclDiag
4379  , DeclLow >
4380  , IfTrue_< UPP
4381  , DeclUpp
4382  , Noop > > > > ForwardFunctor;
4383  //**********************************************************************************************
4384 
4385  public:
4386  //**Type definitions****************************************************************************
4388  typedef MultTrait_<RES,ST> ResultType;
4393  typedef const ElementType ReturnType;
4394  typedef const ResultType CompositeType;
4395 
4398 
4400  typedef ST RightOperand;
4401 
4404 
4407  //**********************************************************************************************
4408 
4409  //**Compilation flags***************************************************************************
4411  enum : bool { simdEnabled = !IsDiagonal<MT1>::value && !IsDiagonal<MT2>::value &&
4412  MT1::simdEnabled && MT2::simdEnabled &&
4416 
4418  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
4419  !evaluateRight && MT2::smpAssignable };
4420  //**********************************************************************************************
4421 
4422  //**SIMD properties*****************************************************************************
4424  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
4425  //**********************************************************************************************
4426 
4427  //**Constructor*********************************************************************************
4433  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
4434  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
4435  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
4436  {}
4437  //**********************************************************************************************
4438 
4439  //**Access operator*****************************************************************************
4446  inline ReturnType operator()( size_t i, size_t j ) const {
4447  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
4448  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
4449  return matrix_(i,j) * scalar_;
4450  }
4451  //**********************************************************************************************
4452 
4453  //**At function*********************************************************************************
4461  inline ReturnType at( size_t i, size_t j ) const {
4462  if( i >= matrix_.rows() ) {
4463  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
4464  }
4465  if( j >= matrix_.columns() ) {
4466  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
4467  }
4468  return (*this)(i,j);
4469  }
4470  //**********************************************************************************************
4471 
4472  //**Rows function*******************************************************************************
4477  inline size_t rows() const {
4478  return matrix_.rows();
4479  }
4480  //**********************************************************************************************
4481 
4482  //**Columns function****************************************************************************
4487  inline size_t columns() const {
4488  return matrix_.columns();
4489  }
4490  //**********************************************************************************************
4491 
4492  //**Left operand access*************************************************************************
4497  inline LeftOperand leftOperand() const {
4498  return matrix_;
4499  }
4500  //**********************************************************************************************
4501 
4502  //**Right operand access************************************************************************
4507  inline RightOperand rightOperand() const {
4508  return scalar_;
4509  }
4510  //**********************************************************************************************
4511 
4512  //**********************************************************************************************
4518  template< typename T >
4519  inline bool canAlias( const T* alias ) const {
4520  return matrix_.canAlias( alias );
4521  }
4522  //**********************************************************************************************
4523 
4524  //**********************************************************************************************
4530  template< typename T >
4531  inline bool isAliased( const T* alias ) const {
4532  return matrix_.isAliased( alias );
4533  }
4534  //**********************************************************************************************
4535 
4536  //**********************************************************************************************
4541  inline bool isAligned() const {
4542  return matrix_.isAligned();
4543  }
4544  //**********************************************************************************************
4545 
4546  //**********************************************************************************************
4551  inline bool canSMPAssign() const noexcept {
4552  return ( !BLAZE_BLAS_IS_PARALLEL ||
4553  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
4554  ( rows() * columns() >= SMP_DMATTDMATMULT_THRESHOLD );
4555  }
4556  //**********************************************************************************************
4557 
4558  private:
4559  //**Member variables****************************************************************************
4560  LeftOperand matrix_;
4561  RightOperand scalar_;
4562  //**********************************************************************************************
4563 
4564  //**Assignment to dense matrices****************************************************************
4576  template< typename MT // Type of the target dense matrix
4577  , bool SO > // Storage order of the target dense matrix
4578  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4579  {
4581 
4582  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4583  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4584 
4585  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
4586  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
4587 
4588  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4589  return;
4590  }
4591  else if( left.columns() == 0UL ) {
4592  reset( ~lhs );
4593  return;
4594  }
4595 
4596  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4597  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4598 
4599  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4600  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4601  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4602  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4603  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4604  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4605 
4606  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
4607  }
4608  //**********************************************************************************************
4609 
4610  //**Assignment to dense matrices (kernel selection)*********************************************
4621  template< typename MT3 // Type of the left-hand side target matrix
4622  , typename MT4 // Type of the left-hand side matrix operand
4623  , typename MT5 // Type of the right-hand side matrix operand
4624  , typename ST2 > // Type of the scalar value
4625  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4626  {
4628  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
4629  selectSmallAssignKernel( C, A, B, scalar );
4630  else
4631  selectBlasAssignKernel( C, A, B, scalar );
4632  }
4633  //**********************************************************************************************
4634 
4635  //**Default assignment to row-major dense matrices (general/general)****************************
4649  template< typename MT3 // Type of the left-hand side target matrix
4650  , typename MT4 // Type of the left-hand side matrix operand
4651  , typename MT5 // Type of the right-hand side matrix operand
4652  , typename ST2 > // Type of the scalar value
4654  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4655  {
4656  const size_t M( A.rows() );
4657  const size_t N( B.columns() );
4658  const size_t K( A.columns() );
4659 
4660  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
4661 
4662  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
4663  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
4664  :( 0UL ) );
4665  const size_t iend( ( IsStrictlyUpper<MT4>::value )
4666  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
4667  :( M ) );
4668  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4669 
4670  for( size_t i=0UL; i<ibegin; ++i ) {
4671  for( size_t j=0UL; j<N; ++j ) {
4672  reset( (~C)(i,j) );
4673  }
4674  }
4675  for( size_t i=ibegin; i<iend; ++i )
4676  {
4677  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4679  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
4680  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
4682  ?( SYM || HERM || UPP ? max( i, 1UL ) : 1UL )
4683  :( SYM || HERM || UPP ? i : 0UL ) ) );
4684  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
4686  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
4687  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
4689  ?( LOW ? min(i+1UL,N-1UL) : N-1UL )
4690  :( LOW ? i+1UL : N ) ) );
4691 
4692  if( ( SYM || HERM || LOW || UPP ) && ( jbegin > jend ) ) {
4693  for( size_t j=0UL; j<N; ++j ) {
4694  reset( (~C)(i,j) );
4695  }
4696  continue;
4697  }
4698 
4699  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4700 
4701  for( size_t j=( SYM || HERM ? i : 0UL ); j<jbegin; ++j ) {
4702  reset( (~C)(i,j) );
4703  }
4704  for( size_t j=jbegin; j<jend; ++j )
4705  {
4706  const size_t kbegin( ( IsUpper<MT4>::value )
4707  ?( ( IsLower<MT5>::value )
4708  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4709  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4710  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4711  :( ( IsLower<MT5>::value )
4712  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4713  :( 0UL ) ) );
4714  const size_t kend( ( IsLower<MT4>::value )
4715  ?( ( IsUpper<MT5>::value )
4716  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
4717  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
4718  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4719  :( ( IsUpper<MT5>::value )
4720  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4721  :( K ) ) );
4722  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
4723 
4724  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
4725  for( size_t k=kbegin+1UL; k<kend; ++k ) {
4726  (~C)(i,j) += A(i,k) * B(k,j);
4727  }
4728  (~C)(i,j) *= scalar;
4729  }
4730  for( size_t j=jend; j<N; ++j ) {
4731  reset( (~C)(i,j) );
4732  }
4733  }
4734  for( size_t i=iend; i<M; ++i ) {
4735  for( size_t j=0UL; j<N; ++j ) {
4736  reset( (~C)(i,j) );
4737  }
4738  }
4739 
4740  if( SYM || HERM ) {
4741  for( size_t i=1UL; i<M; ++i ) {
4742  for( size_t j=0UL; j<i; ++j ) {
4743  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
4744  }
4745  }
4746  }
4747  }
4748  //**********************************************************************************************
4749 
4750  //**Default assignment to column-major dense matrices (general/general)*************************
4764  template< typename MT3 // Type of the left-hand side target matrix
4765  , typename MT4 // Type of the left-hand side matrix operand
4766  , typename MT5 // Type of the right-hand side matrix operand
4767  , typename ST2 > // Type of the scalar value
4768  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
4769  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4770  {
4771  const size_t M( A.rows() );
4772  const size_t N( B.columns() );
4773  const size_t K( A.columns() );
4774 
4775  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
4776 
4777  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
4778  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
4779  :( 0UL ) );
4780  const size_t jend( ( IsStrictlyLower<MT5>::value )
4781  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
4782  :( N ) );
4783  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4784 
4785  for( size_t j=0UL; j<jbegin; ++j ) {
4786  for( size_t i=0UL; i<M; ++i ) {
4787  reset( (~C)(i,j) );
4788  }
4789  }
4790  for( size_t j=jbegin; j<jend; ++j )
4791  {
4792  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
4794  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
4795  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4797  ?( SYM || HERM || LOW ? max( j, 1UL ) : 1UL )
4798  :( SYM || HERM || LOW ? j : 0UL ) ) );
4799  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4801  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
4802  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
4804  ?( UPP ? min(j+1UL,M-1UL) : M-1UL )
4805  :( UPP ? j+1UL : M ) ) );
4806 
4807  if( ( SYM || HERM || LOW || UPP ) && ( ibegin > iend ) ) {
4808  for( size_t i=0UL; i<M; ++i ) {
4809  reset( (~C)(i,j) );
4810  }
4811  continue;
4812  }
4813 
4814  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4815 
4816  for( size_t i=( SYM || HERM ? j : 0UL ); i<ibegin; ++i ) {
4817  reset( (~C)(i,j) );
4818  }
4819  for( size_t i=ibegin; i<iend; ++i )
4820  {
4821  const size_t kbegin( ( IsUpper<MT4>::value )
4822  ?( ( IsLower<MT5>::value )
4823  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4824  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4825  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4826  :( ( IsLower<MT5>::value )
4827  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4828  :( 0UL ) ) );
4829  const size_t kend( ( IsLower<MT4>::value )
4830  ?( ( IsUpper<MT5>::value )
4831  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
4832  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
4833  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4834  :( ( IsUpper<MT5>::value )
4835  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4836  :( K ) ) );
4837  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
4838 
4839  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
4840  for( size_t k=kbegin+1UL; k<kend; ++k ) {
4841  (~C)(i,j) += A(i,k) * B(k,j);
4842  }
4843  (~C)(i,j) *= scalar;
4844  }
4845  for( size_t i=iend; i<M; ++i ) {
4846  reset( (~C)(i,j) );
4847  }
4848  }
4849  for( size_t j=jend; j<N; ++j ) {
4850  for( size_t i=0UL; i<M; ++i ) {
4851  reset( (~C)(i,j) );
4852  }
4853  }
4854 
4855  if( SYM || HERM ) {
4856  for( size_t j=1UL; j<N; ++j ) {
4857  for( size_t i=0UL; i<j; ++i ) {
4858  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
4859  }
4860  }
4861  }
4862  }
4863  //**********************************************************************************************
4864 
4865  //**Default assignment to row-major dense matrices (general/diagonal)***************************
4879  template< typename MT3 // Type of the left-hand side target matrix
4880  , typename MT4 // Type of the left-hand side matrix operand
4881  , typename MT5 // Type of the right-hand side matrix operand
4882  , typename ST2 > // Type of the scalar value
4883  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
4884  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4885  {
4886  const size_t M( A.rows() );
4887  const size_t N( B.columns() );
4888 
4889  for( size_t i=0UL; i<M; ++i )
4890  {
4891  const size_t jbegin( ( IsUpper<MT4>::value )
4892  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4893  :( 0UL ) );
4894  const size_t jend( ( IsLower<MT4>::value )
4895  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4896  :( N ) );
4897  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4898 
4899  if( IsUpper<MT4>::value ) {
4900  for( size_t j=0UL; j<jbegin; ++j ) {
4901  reset( (~C)(i,j) );
4902  }
4903  }
4904  for( size_t j=jbegin; j<jend; ++j ) {
4905  (~C)(i,j) = A(i,j) * B(j,j) * scalar;
4906  }
4907  if( IsLower<MT4>::value ) {
4908  for( size_t j=jend; j<N; ++j ) {
4909  reset( (~C)(i,j) );
4910  }
4911  }
4912  }
4913  }
4914  //**********************************************************************************************
4915 
4916  //**Default assignment to column-major dense matrices (general/diagonal)************************
4930  template< typename MT3 // Type of the left-hand side target matrix
4931  , typename MT4 // Type of the left-hand side matrix operand
4932  , typename MT5 // Type of the right-hand side matrix operand
4933  , typename ST2 > // Type of the scalar value
4934  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
4935  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4936  {
4937  constexpr size_t block( BLOCK_SIZE );
4938 
4939  const size_t M( A.rows() );
4940  const size_t N( B.columns() );
4941 
4942  for( size_t jj=0UL; jj<N; jj+=block ) {
4943  const size_t jend( min( N, jj+block ) );
4944  for( size_t ii=0UL; ii<M; ii+=block ) {
4945  const size_t iend( min( M, ii+block ) );
4946  for( size_t j=jj; j<jend; ++j )
4947  {
4948  const size_t ibegin( ( IsLower<MT4>::value )
4949  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
4950  :( ii ) );
4951  const size_t ipos( ( IsUpper<MT4>::value )
4952  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
4953  :( iend ) );
4954 
4955  if( IsLower<MT4>::value ) {
4956  for( size_t i=ii; i<ibegin; ++i ) {
4957  reset( (~C)(i,j) );
4958  }
4959  }
4960  for( size_t i=ibegin; i<ipos; ++i ) {
4961  (~C)(i,j) = A(i,j) * B(j,j) * scalar;
4962  }
4963  if( IsUpper<MT4>::value ) {
4964  for( size_t i=ipos; i<iend; ++i ) {
4965  reset( (~C)(i,j) );
4966  }
4967  }
4968  }
4969  }
4970  }
4971  }
4972  //**********************************************************************************************
4973 
4974  //**Default assignment to row-major dense matrices (diagonal/general)***************************
4988  template< typename MT3 // Type of the left-hand side target matrix
4989  , typename MT4 // Type of the left-hand side matrix operand
4990  , typename MT5 // Type of the right-hand side matrix operand
4991  , typename ST2 > // Type of the scalar value
4993  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4994  {
4995  constexpr size_t block( BLOCK_SIZE );
4996 
4997  const size_t M( A.rows() );
4998  const size_t N( B.columns() );
4999 
5000  for( size_t ii=0UL; ii<M; ii+=block ) {
5001  const size_t iend( min( M, ii+block ) );
5002  for( size_t jj=0UL; jj<N; jj+=block ) {
5003  const size_t jend( min( N, jj+block ) );
5004  for( size_t i=ii; i<iend; ++i )
5005  {
5006  const size_t jbegin( ( IsUpper<MT5>::value )
5007  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
5008  :( jj ) );
5009  const size_t jpos( ( IsLower<MT5>::value )
5010  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
5011  :( jend ) );
5012 
5013  if( IsUpper<MT5>::value ) {
5014  for( size_t j=jj; j<jbegin; ++j ) {
5015  reset( (~C)(i,j) );
5016  }
5017  }
5018  for( size_t j=jbegin; j<jpos; ++j ) {
5019  (~C)(i,j) = A(i,i) * B(i,j) * scalar;
5020  }
5021  if( IsLower<MT5>::value ) {
5022  for( size_t j=jpos; j<jend; ++j ) {
5023  reset( (~C)(i,j) );
5024  }
5025  }
5026  }
5027  }
5028  }
5029  }
5030  //**********************************************************************************************
5031 
5032  //**Default assignment to column-major dense matrices (diagonal/general)************************
5046  template< typename MT3 // Type of the left-hand side target matrix
5047  , typename MT4 // Type of the left-hand side matrix operand
5048  , typename MT5 // Type of the right-hand side matrix operand
5049  , typename ST2 > // Type of the scalar value
5050  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
5051  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5052  {
5053  const size_t M( A.rows() );
5054  const size_t N( B.columns() );
5055 
5056  for( size_t j=0UL; j<N; ++j )
5057  {
5058  const size_t ibegin( ( IsLower<MT5>::value )
5059  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
5060  :( 0UL ) );
5061  const size_t iend( ( IsUpper<MT5>::value )
5062  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
5063  :( M ) );
5064  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
5065 
5066  if( IsLower<MT5>::value ) {
5067  for( size_t i=0UL; i<ibegin; ++i ) {
5068  reset( (~C)(i,j) );
5069  }
5070  }
5071  for( size_t i=ibegin; i<iend; ++i ) {
5072  (~C)(i,j) = A(i,i) * B(i,j) * scalar;
5073  }
5074  if( IsUpper<MT5>::value ) {
5075  for( size_t i=iend; i<M; ++i ) {
5076  reset( (~C)(i,j) );
5077  }
5078  }
5079  }
5080  }
5081  //**********************************************************************************************
5082 
5083  //**Default assignment to dense matrices (diagonal/diagonal)************************************
5097  template< typename MT3 // Type of the left-hand side target matrix
5098  , typename MT4 // Type of the left-hand side matrix operand
5099  , typename MT5 // Type of the right-hand side matrix operand
5100  , typename ST2 > // Type of the scalar value
5101  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
5102  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5103  {
5104  reset( C );
5105 
5106  for( size_t i=0UL; i<A.rows(); ++i ) {
5107  C(i,i) = A(i,i) * B(i,i) * scalar;
5108  }
5109  }
5110  //**********************************************************************************************
5111 
5112  //**Default assignment to dense matrices (small matrices)***************************************
5126  template< typename MT3 // Type of the left-hand side target matrix
5127  , typename MT4 // Type of the left-hand side matrix operand
5128  , typename MT5 // Type of the right-hand side matrix operand
5129  , typename ST2 > // Type of the scalar value
5131  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5132  {
5133  selectDefaultAssignKernel( C, A, B, scalar );
5134  }
5135  //**********************************************************************************************
5136 
5137  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
5152  template< typename MT3 // Type of the left-hand side target matrix
5153  , typename MT4 // Type of the left-hand side matrix operand
5154  , typename MT5 // Type of the right-hand side matrix operand
5155  , typename ST2 > // Type of the scalar value
5157  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5158  {
5159  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
5160 
5161  const size_t M( A.rows() );
5162  const size_t N( B.columns() );
5163  const size_t K( A.columns() );
5164 
5165  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
5166 
5167  if( LOW && UPP ) {
5168  reset( ~C );
5169  }
5170 
5171  {
5172  size_t i( 0UL );
5173 
5174  for( ; !( LOW && UPP ) && (i+2UL) <= M; i+=2UL )
5175  {
5176  const size_t jend( LOW ? i+2UL : N );
5177  size_t j( SYM || HERM || UPP ? i : 0UL );
5178 
5179  for( ; (j+4UL) <= jend; j+=4UL )
5180  {
5181  const size_t kbegin( ( IsUpper<MT4>::value )
5182  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5183  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5184  const size_t kend( ( IsLower<MT4>::value )
5185  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
5186  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
5187 
5188  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5189  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5190 
5191  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5192  size_t k( kbegin );
5193 
5194  for( ; k<kpos; k+=SIMDSIZE ) {
5195  const SIMDType a1( A.load(i ,k) );
5196  const SIMDType a2( A.load(i+1UL,k) );
5197  const SIMDType b1( B.load(k,j ) );
5198  const SIMDType b2( B.load(k,j+1UL) );
5199  const SIMDType b3( B.load(k,j+2UL) );
5200  const SIMDType b4( B.load(k,j+3UL) );
5201  xmm1 += a1 * b1;
5202  xmm2 += a1 * b2;
5203  xmm3 += a1 * b3;
5204  xmm4 += a1 * b4;
5205  xmm5 += a2 * b1;
5206  xmm6 += a2 * b2;
5207  xmm7 += a2 * b3;
5208  xmm8 += a2 * b4;
5209  }
5210 
5211  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5212  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5213  (~C)(i ,j+2UL) = sum( xmm3 ) * scalar;
5214  (~C)(i ,j+3UL) = sum( xmm4 ) * scalar;
5215  (~C)(i+1UL,j ) = sum( xmm5 ) * scalar;
5216  (~C)(i+1UL,j+1UL) = sum( xmm6 ) * scalar;
5217  (~C)(i+1UL,j+2UL) = sum( xmm7 ) * scalar;
5218  (~C)(i+1UL,j+3UL) = sum( xmm8 ) * scalar;
5219 
5220  for( ; remainder && k<kend; ++k ) {
5221  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5222  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5223  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL) * scalar;
5224  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL) * scalar;
5225  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5226  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5227  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL) * scalar;
5228  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL) * scalar;
5229  }
5230  }
5231 
5232  for( ; (j+2UL) <= jend; j+=2UL )
5233  {
5234  const size_t kbegin( ( IsUpper<MT4>::value )
5235  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5236  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5237  const size_t kend( ( IsLower<MT4>::value )
5238  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
5239  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5240 
5241  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5242  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5243 
5244  SIMDType xmm1, xmm2, xmm3, xmm4;
5245  size_t k( kbegin );
5246 
5247  for( ; k<kpos; k+=SIMDSIZE ) {
5248  const SIMDType a1( A.load(i ,k) );
5249  const SIMDType a2( A.load(i+1UL,k) );
5250  const SIMDType b1( B.load(k,j ) );
5251  const SIMDType b2( B.load(k,j+1UL) );
5252  xmm1 += a1 * b1;
5253  xmm2 += a1 * b2;
5254  xmm3 += a2 * b1;
5255  xmm4 += a2 * b2;
5256  }
5257 
5258  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5259  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5260  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5261  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5262 
5263  for( ; remainder && k<kend; ++k ) {
5264  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5265  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5266  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5267  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5268  }
5269  }
5270 
5271  if( j < jend )
5272  {
5273  const size_t kbegin( ( IsUpper<MT4>::value )
5274  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5275  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5276  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
5277 
5278  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5279  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5280 
5281  SIMDType xmm1, xmm2;
5282  size_t k( kbegin );
5283 
5284  for( ; k<kpos; k+=SIMDSIZE ) {
5285  const SIMDType b1( B.load(k,j) );
5286  xmm1 += A.load(i ,k) * b1;
5287  xmm2 += A.load(i+1UL,k) * b1;
5288  }
5289 
5290  (~C)(i ,j) = sum( xmm1 ) * scalar;
5291  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5292 
5293  for( ; remainder && k<kend; ++k ) {
5294  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
5295  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
5296  }
5297  }
5298  }
5299 
5300  for( ; i<M; ++i )
5301  {
5302  const size_t jend( LOW ? i+1UL : N );
5303  size_t j( SYM || HERM || UPP ? i : 0UL );
5304 
5305  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
5306  {
5307  const size_t kbegin( ( IsUpper<MT4>::value )
5308  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5309  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5310  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
5311 
5312  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5313  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5314 
5315  SIMDType xmm1, xmm2, xmm3, xmm4;
5316  size_t k( kbegin );
5317 
5318  for( ; k<kpos; k+=SIMDSIZE ) {
5319  const SIMDType a1( A.load(i,k) );
5320  xmm1 += a1 * B.load(k,j );
5321  xmm2 += a1 * B.load(k,j+1UL);
5322  xmm3 += a1 * B.load(k,j+2UL);
5323  xmm4 += a1 * B.load(k,j+3UL);
5324  }
5325 
5326  (~C)(i,j ) = sum( xmm1 ) * scalar;
5327  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5328  (~C)(i,j+2UL) = sum( xmm3 ) * scalar;
5329  (~C)(i,j+3UL) = sum( xmm4 ) * scalar;
5330 
5331  for( ; remainder && k<kend; ++k ) {
5332  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
5333  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
5334  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL) * scalar;
5335  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL) * scalar;
5336  }
5337  }
5338 
5339  for( ; !( LOW && UPP ) && (j+2UL) <= jend; j+=2UL )
5340  {
5341  const size_t kbegin( ( IsUpper<MT4>::value )
5342  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5343  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5344  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
5345 
5346  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5347  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5348 
5349  SIMDType xmm1, xmm2;
5350  size_t k( kbegin );
5351 
5352  for( ; k<kpos; k+=SIMDSIZE ) {
5353  const SIMDType a1( A.load(i,k) );
5354  xmm1 += a1 * B.load(k,j );
5355  xmm2 += a1 * B.load(k,j+1UL);
5356  }
5357 
5358  (~C)(i,j ) = sum( xmm1 ) * scalar;
5359  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5360 
5361  for( ; remainder && k<kend; ++k ) {
5362  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
5363  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
5364  }
5365  }
5366 
5367  for( ; j<jend; ++j )
5368  {
5369  const size_t kbegin( ( IsUpper<MT4>::value )
5370  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5371  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5372 
5373  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
5374  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5375 
5376  SIMDType xmm1;
5377  size_t k( kbegin );
5378 
5379  for( ; k<kpos; k+=SIMDSIZE ) {
5380  xmm1 += A.load(i,k) * B.load(k,j);
5381  }
5382 
5383  (~C)(i,j) = sum( xmm1 ) * scalar;
5384 
5385  for( ; remainder && k<K; ++k ) {
5386  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
5387  }
5388  }
5389  }
5390  }
5391 
5392  if( SYM || HERM ) {
5393  for( size_t i=2UL; i<M; ++i ) {
5394  const size_t jend( 2UL * ( i/2UL ) );
5395  for( size_t j=0UL; j<jend; ++j ) {
5396  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
5397  }
5398  }
5399  }
5400  else if( LOW && !UPP ) {
5401  for( size_t j=2UL; j<N; ++j ) {
5402  const size_t iend( 2UL * ( j/2UL ) );
5403  for( size_t i=0UL; i<iend; ++i ) {
5404  reset( (~C)(i,j) );
5405  }
5406  }
5407  }
5408  else if( !LOW && UPP ) {
5409  for( size_t i=2UL; i<M; ++i ) {
5410  const size_t jend( 2UL * ( i/2UL ) );
5411  for( size_t j=0UL; j<jend; ++j ) {
5412  reset( (~C)(i,j) );
5413  }
5414  }
5415  }
5416  }
5417  //**********************************************************************************************
5418 
5419  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
5434  template< typename MT3 // Type of the left-hand side target matrix
5435  , typename MT4 // Type of the left-hand side matrix operand
5436  , typename MT5 // Type of the right-hand side matrix operand
5437  , typename ST2 > // Type of the scalar value
5439  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5440  {
5441  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
5442 
5443  const size_t M( A.rows() );
5444  const size_t N( B.columns() );
5445  const size_t K( A.columns() );
5446 
5447  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
5448 
5449  if( LOW || UPP ) {
5450  reset( ~C );
5451  }
5452 
5453  {
5454  size_t i( 0UL );
5455 
5456  for( ; !SYM && !HERM && !LOW && !UPP && (i+4UL) <= M; i+=4UL )
5457  {
5458  size_t j( 0UL );
5459 
5460  for( ; (j+2UL) <= N; j+=2UL )
5461  {
5462  const size_t kbegin( ( IsUpper<MT4>::value )
5463  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5464  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5465  const size_t kend( ( IsLower<MT4>::value )
5466  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
5467  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5468 
5469  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5470  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5471 
5472  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5473  size_t k( kbegin );
5474 
5475  for( ; k<kpos; k+=SIMDSIZE ) {
5476  const SIMDType a1( A.load(i ,k) );
5477  const SIMDType a2( A.load(i+1UL,k) );
5478  const SIMDType a3( A.load(i+2UL,k) );
5479  const SIMDType a4( A.load(i+3UL,k) );
5480  const SIMDType b1( B.load(k,j ) );
5481  const SIMDType b2( B.load(k,j+1UL) );
5482  xmm1 += a1 * b1;
5483  xmm2 += a1 * b2;
5484  xmm3 += a2 * b1;
5485  xmm4 += a2 * b2;
5486  xmm5 += a3 * b1;
5487  xmm6 += a3 * b2;
5488  xmm7 += a4 * b1;
5489  xmm8 += a4 * b2;
5490  }
5491 
5492  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5493  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5494  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5495  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5496  (~C)(i+2UL,j ) = sum( xmm5 ) * scalar;
5497  (~C)(i+2UL,j+1UL) = sum( xmm6 ) * scalar;
5498  (~C)(i+3UL,j ) = sum( xmm7 ) * scalar;
5499  (~C)(i+3UL,j+1UL) = sum( xmm8 ) * scalar;
5500 
5501  for( ; remainder && k<kend; ++k ) {
5502  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5503  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5504  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5505  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5506  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j ) * scalar;
5507  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL) * scalar;
5508  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j ) * scalar;
5509  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL) * scalar;
5510  }
5511  }
5512 
5513  if( j < N )
5514  {
5515  const size_t kbegin( ( IsUpper<MT4>::value )
5516  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5517  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5518  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
5519 
5520  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5521  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5522 
5523  SIMDType xmm1, xmm2, xmm3, xmm4;
5524  size_t k( kbegin );
5525 
5526  for( ; k<kpos; k+=SIMDSIZE ) {
5527  const SIMDType b1( B.load(k,j) );
5528  xmm1 += A.load(i ,k) * b1;
5529  xmm2 += A.load(i+1UL,k) * b1;
5530  xmm3 += A.load(i+2UL,k) * b1;
5531  xmm4 += A.load(i+3UL,k) * b1;
5532  }
5533 
5534  (~C)(i ,j) = sum( xmm1 ) * scalar;
5535  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5536  (~C)(i+2UL,j) = sum( xmm3 ) * scalar;
5537  (~C)(i+3UL,j) = sum( xmm4 ) * scalar;
5538 
5539  for( ; remainder && k<kend; ++k ) {
5540  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
5541  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
5542  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j) * scalar;
5543  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j) * scalar;
5544  }
5545  }
5546  }
5547 
5548  for( ; (i+2UL) <= M; i+=2UL )
5549  {
5550  const size_t jend( LOW ? i+2UL : N );
5551  size_t j( SYM || HERM || UPP ? i : 0UL );
5552 
5553  for( ; (j+2UL) <= jend; j+=2UL )
5554  {
5555  const size_t kbegin( ( IsUpper<MT4>::value )
5556  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5557  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5558  const size_t kend( ( IsLower<MT4>::value )
5559  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
5560  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5561 
5562  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5563  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5564 
5565  SIMDType xmm1, xmm2, xmm3, xmm4;
5566  size_t k( kbegin );
5567 
5568  for( ; k<kpos; k+=SIMDSIZE ) {
5569  const SIMDType a1( A.load(i ,k) );
5570  const SIMDType a2( A.load(i+1UL,k) );
5571  const SIMDType b1( B.load(k,j ) );
5572  const SIMDType b2( B.load(k,j+1UL) );
5573  xmm1 += a1 * b1;
5574  xmm2 += a1 * b2;
5575  xmm3 += a2 * b1;
5576  xmm4 += a2 * b2;
5577  }
5578 
5579  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5580  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5581  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5582  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5583 
5584  for( ; remainder && k<kend; ++k ) {
5585  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5586  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5587  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5588  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5589  }
5590  }
5591 
5592  if( j < jend )
5593  {
5594  const size_t kbegin( ( IsUpper<MT4>::value )
5595  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5596  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5597  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
5598 
5599  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5600  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5601 
5602  SIMDType xmm1, xmm2;
5603  size_t k( kbegin );
5604 
5605  for( ; k<kpos; k+=SIMDSIZE ) {
5606  const SIMDType b1( B.load(k,j) );
5607  xmm1 += A.load(i ,k) * b1;
5608  xmm2 += A.load(i+1UL,k) * b1;
5609  }
5610 
5611  (~C)(i ,j) = sum( xmm1 ) * scalar;
5612  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5613 
5614  for( ; remainder && k<kend; ++k ) {
5615  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
5616  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
5617  }
5618  }
5619  }
5620 
5621  if( i < M )
5622  {
5623  const size_t jend( LOW ? i+1UL : N );
5624  size_t j( SYM || HERM || UPP ? i : 0UL );
5625 
5626  for( ; (j+2UL) <= jend; j+=2UL )
5627  {
5628  const size_t kbegin( ( IsUpper<MT4>::value )
5629  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5630  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5631  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
5632 
5633  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5634  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5635 
5636  SIMDType xmm1, xmm2;
5637  size_t k( kbegin );
5638 
5639  for( ; k<kpos; k+=SIMDSIZE ) {
5640  const SIMDType a1( A.load(i,k) );
5641  xmm1 += a1 * B.load(k,j );
5642  xmm2 += a1 * B.load(k,j+1UL);
5643  }
5644 
5645  (~C)(i,j ) = sum( xmm1 ) * scalar;
5646  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5647 
5648  for( ; remainder && k<kend; ++k ) {
5649  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
5650  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
5651  }
5652  }
5653 
5654  if( j < jend )
5655  {
5656  const size_t kbegin( ( IsUpper<MT4>::value )
5657  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5658  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5659 
5660  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
5661  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5662 
5663  SIMDType xmm1;
5664  size_t k( kbegin );
5665 
5666  for( ; k<kpos; k+=SIMDSIZE ) {
5667  xmm1 += A.load(i,k) * B.load(k,j);
5668  }
5669 
5670  (~C)(i,j) = sum( xmm1 ) * scalar;
5671 
5672  for( ; remainder && k<K; ++k ) {
5673  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
5674  }
5675  }
5676  }
5677  }
5678 
5679  if( SYM || HERM ) {
5680  for( size_t j=0UL; j<N; ++j ) {
5681  for( size_t i=j+1UL; i<M; ++i ) {
5682  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
5683  }
5684  }
5685  }
5686  }
5687  //**********************************************************************************************
5688 
5689  //**Default assignment to dense matrices (large matrices)***************************************
5703  template< typename MT3 // Type of the left-hand side target matrix
5704  , typename MT4 // Type of the left-hand side matrix operand
5705  , typename MT5 // Type of the right-hand side matrix operand
5706  , typename ST2 > // Type of the scalar value
5708  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5709  {
5710  selectDefaultAssignKernel( C, A, B, scalar );
5711  }
5712  //**********************************************************************************************
5713 
5714  //**Vectorized default assignment to dense matrices (large matrices)****************************
5729  template< typename MT3 // Type of the left-hand side target matrix
5730  , typename MT4 // Type of the left-hand side matrix operand
5731  , typename MT5 // Type of the right-hand side matrix operand
5732  , typename ST2 > // Type of the scalar value
5734  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5735  {
5736  if( SYM )
5737  smmm( C, A, B, scalar );
5738  else if( HERM )
5739  hmmm( C, A, B, scalar );
5740  else if( LOW )
5741  lmmm( C, A, B, scalar, ST2(0) );
5742  else if( UPP )
5743  ummm( C, A, B, scalar, ST2(0) );
5744  else
5745  mmm( C, A, B, scalar, ST2(0) );
5746  }
5747  //**********************************************************************************************
5748 
5749  //**BLAS-based assignment to dense matrices (default)*******************************************
5763  template< typename MT3 // Type of the left-hand side target matrix
5764  , typename MT4 // Type of the left-hand side matrix operand
5765  , typename MT5 // Type of the right-hand side matrix operand
5766  , typename ST2 > // Type of the scalar value
5768  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5769  {
5770  selectLargeAssignKernel( C, A, B, scalar );
5771  }
5772  //**********************************************************************************************
5773 
5774  //**BLAS-based assignment to dense matrices*****************************************************
5775 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
5776 
5789  template< typename MT3 // Type of the left-hand side target matrix
5790  , typename MT4 // Type of the left-hand side matrix operand
5791  , typename MT5 // Type of the right-hand side matrix operand
5792  , typename ST2 > // Type of the scalar value
5794  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5795  {
5796  typedef ElementType_<MT3> ET;
5797 
5798  if( IsTriangular<MT4>::value ) {
5799  assign( C, B );
5800  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5801  }
5802  else if( IsTriangular<MT5>::value ) {
5803  assign( C, A );
5804  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5805  }
5806  else {
5807  gemm( C, A, B, ET(scalar), ET(0) );
5808  }
5809  }
5810 #endif
5811  //**********************************************************************************************
5812 
5813  //**Assignment to sparse matrices***************************************************************
5825  template< typename MT // Type of the target sparse matrix
5826  , bool SO > // Storage order of the target sparse matrix
5827  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5828  {
5830 
5832 
5839 
5840  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5841  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5842 
5843  const ForwardFunctor fwd;
5844 
5845  const TmpType tmp( serial( rhs ) );
5846  assign( ~lhs, fwd( tmp ) );
5847  }
5848  //**********************************************************************************************
5849 
5850  //**Addition assignment to dense matrices*******************************************************
5862  template< typename MT // Type of the target dense matrix
5863  , bool SO > // Storage order of the target dense matrix
5864  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5865  {
5867 
5868  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5869  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5870 
5871  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
5872  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
5873 
5874  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
5875  return;
5876  }
5877 
5878  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
5879  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
5880 
5881  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5882  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
5883  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
5884  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
5885  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
5886  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
5887 
5888  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
5889  }
5890  //**********************************************************************************************
5891 
5892  //**Addition assignment to dense matrices (kernel selection)************************************
5903  template< typename MT3 // Type of the left-hand side target matrix
5904  , typename MT4 // Type of the left-hand side matrix operand
5905  , typename MT5 // Type of the right-hand side matrix operand
5906  , typename ST2 > // Type of the scalar value
5907  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5908  {
5910  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
5911  selectSmallAddAssignKernel( C, A, B, scalar );
5912  else
5913  selectBlasAddAssignKernel( C, A, B, scalar );
5914  }
5915  //**********************************************************************************************
5916 
5917  //**Default addition assignment to dense matrices (general/general)*****************************
5931  template< typename MT3 // Type of the left-hand side target matrix
5932  , typename MT4 // Type of the left-hand side matrix operand
5933  , typename MT5 // Type of the right-hand side matrix operand
5934  , typename ST2 > // Type of the scalar value
5935  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
5936  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5937  {
5938  const ResultType tmp( serial( A * B * scalar ) );
5939  addAssign( C, tmp );
5940  }
5941  //**********************************************************************************************
5942 
5943  //**Default addition assignment to row-major dense matrices (general/diagonal)******************
5957  template< typename MT3 // Type of the left-hand side target matrix
5958  , typename MT4 // Type of the left-hand side matrix operand
5959  , typename MT5 // Type of the right-hand side matrix operand
5960  , typename ST2 > // Type of the scalar value
5961  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
5962  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5963  {
5964  const size_t M( A.rows() );
5965  const size_t N( B.columns() );
5966 
5967  for( size_t i=0UL; i<M; ++i )
5968  {
5969  const size_t jbegin( ( IsUpper<MT4>::value )
5970  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
5971  :( 0UL ) );
5972  const size_t jend( ( IsLower<MT4>::value )
5973  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
5974  :( N ) );
5975  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
5976 
5977  const size_t jnum( jend - jbegin );
5978  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
5979 
5980  for( size_t j=jbegin; j<jpos; j+=2UL ) {
5981  (~C)(i,j ) += A(i,j ) * B(j ,j ) * scalar;
5982  (~C)(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
5983  }
5984  if( jpos < jend ) {
5985  (~C)(i,jpos) += A(i,jpos) * B(jpos,jpos) * scalar;
5986  }
5987  }
5988  }
5989  //**********************************************************************************************
5990 
5991  //**Default addition assignment to column-major dense matrices (general/diagonal)***************
6005  template< typename MT3 // Type of the left-hand side target matrix
6006  , typename MT4 // Type of the left-hand side matrix operand
6007  , typename MT5 // Type of the right-hand side matrix operand
6008  , typename ST2 > // Type of the scalar value
6009  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6010  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6011  {
6012  constexpr size_t block( BLOCK_SIZE );
6013 
6014  const size_t M( A.rows() );
6015  const size_t N( B.columns() );
6016 
6017  for( size_t jj=0UL; jj<N; jj+=block ) {
6018  const size_t jend( min( N, jj+block ) );
6019  for( size_t ii=0UL; ii<M; ii+=block ) {
6020  const size_t iend( min( M, ii+block ) );
6021  for( size_t j=jj; j<jend; ++j )
6022  {
6023  const size_t ibegin( ( IsLower<MT4>::value )
6024  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
6025  :( ii ) );
6026  const size_t ipos( ( IsUpper<MT4>::value )
6027  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
6028  :( iend ) );
6029 
6030  for( size_t i=ibegin; i<ipos; ++i ) {
6031  (~C)(i,j) += A(i,j) * B(j,j) * scalar;
6032  }
6033  }
6034  }
6035  }
6036  }
6037  //**********************************************************************************************
6038 
6039  //**Default addition assignment to row-major dense matrices (diagonal/general)******************
6053  template< typename MT3 // Type of the left-hand side target matrix
6054  , typename MT4 // Type of the left-hand side matrix operand
6055  , typename MT5 // Type of the right-hand side matrix operand
6056  , typename ST2 > // Type of the scalar value
6057  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
6058  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6059  {
6060  constexpr size_t block( BLOCK_SIZE );
6061 
6062  const size_t M( A.rows() );
6063  const size_t N( B.columns() );
6064 
6065  for( size_t ii=0UL; ii<M; ii+=block ) {
6066  const size_t iend( min( M, ii+block ) );
6067  for( size_t jj=0UL; jj<N; jj+=block ) {
6068  const size_t jend( min( N, jj+block ) );
6069  for( size_t i=ii; i<iend; ++i )
6070  {
6071  const size_t jbegin( ( IsUpper<MT5>::value )
6072  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
6073  :( jj ) );
6074  const size_t jpos( ( IsLower<MT5>::value )
6075  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
6076  :( jend ) );
6077 
6078  for( size_t j=jbegin; j<jpos; ++j ) {
6079  (~C)(i,j) += A(i,i) * B(i,j) * scalar;
6080  }
6081  }
6082  }
6083  }
6084  }
6085  //**********************************************************************************************
6086 
6087  //**Default addition assignment to column-major dense matrices (diagonal/general)***************
6101  template< typename MT3 // Type of the left-hand side target matrix
6102  , typename MT4 // Type of the left-hand side matrix operand
6103  , typename MT5 // Type of the right-hand side matrix operand
6104  , typename ST2 > // Type of the scalar value
6105  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
6106  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6107  {
6108  const size_t M( A.rows() );
6109  const size_t N( B.columns() );
6110 
6111  for( size_t j=0UL; j<N; ++j )
6112  {
6113  const size_t ibegin( ( IsLower<MT5>::value )
6114  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
6115  :( 0UL ) );
6116  const size_t iend( ( IsUpper<MT5>::value )
6117  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
6118  :( M ) );
6119  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
6120 
6121  const size_t inum( iend - ibegin );
6122  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
6123 
6124  for( size_t i=ibegin; i<ipos; i+=2UL ) {
6125  (~C)(i ,j) += A(i ,i ) * B(i ,j) * scalar;
6126  (~C)(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
6127  }
6128  if( ipos < iend ) {
6129  (~C)(ipos,j) += A(ipos,ipos) * B(ipos,j) * scalar;
6130  }
6131  }
6132  }
6133  //**********************************************************************************************
6134 
6135  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
6149  template< typename MT3 // Type of the left-hand side target matrix
6150  , typename MT4 // Type of the left-hand side matrix operand
6151  , typename MT5 // Type of the right-hand side matrix operand
6152  , typename ST2 > // Type of the scalar value
6153  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
6154  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6155  {
6156  for( size_t i=0UL; i<A.rows(); ++i ) {
6157  C(i,i) += A(i,i) * B(i,i) * scalar;
6158  }
6159  }
6160  //**********************************************************************************************
6161 
6162  //**Default addition assignment to dense matrices (small matrices)******************************
6176  template< typename MT3 // Type of the left-hand side target matrix
6177  , typename MT4 // Type of the left-hand side matrix operand
6178  , typename MT5 // Type of the right-hand side matrix operand
6179  , typename ST2 > // Type of the scalar value
6181  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6182  {
6183  selectDefaultAddAssignKernel( C, A, B, scalar );
6184  }
6185  //**********************************************************************************************
6186 
6187  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
6202  template< typename MT3 // Type of the left-hand side target matrix
6203  , typename MT4 // Type of the left-hand side matrix operand
6204  , typename MT5 // Type of the right-hand side matrix operand
6205  , typename ST2 > // Type of the scalar value
6207  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6208  {
6209  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
6210 
6211  const size_t M( A.rows() );
6212  const size_t N( B.columns() );
6213  const size_t K( A.columns() );
6214 
6215  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
6216 
6217  size_t i( 0UL );
6218 
6219  for( ; (i+2UL) <= M; i+=2UL )
6220  {
6221  const size_t jend( LOW ? i+2UL : N );
6222  size_t j( UPP ? i : 0UL );
6223 
6224  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
6225  {
6226  const size_t kbegin( ( IsUpper<MT4>::value )
6227  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6228  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6229  const size_t kend( ( IsLower<MT4>::value )
6230  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
6231  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
6232 
6233  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6234  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6235 
6236  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6237  size_t k( kbegin );
6238 
6239  for( ; k<kpos; k+=SIMDSIZE ) {
6240  const SIMDType a1( A.load(i ,k) );
6241  const SIMDType a2( A.load(i+1UL,k) );
6242  const SIMDType b1( B.load(k,j ) );
6243  const SIMDType b2( B.load(k,j+1UL) );
6244  const SIMDType b3( B.load(k,j+2UL) );
6245  const SIMDType b4( B.load(k,j+3UL) );
6246  xmm1 += a1 * b1;
6247  xmm2 += a1 * b2;
6248  xmm3 += a1 * b3;
6249  xmm4 += a1 * b4;
6250  xmm5 += a2 * b1;
6251  xmm6 += a2 * b2;
6252  xmm7 += a2 * b3;
6253  xmm8 += a2 * b4;
6254  }
6255 
6256  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6257  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6258  (~C)(i ,j+2UL) += sum( xmm3 ) * scalar;
6259  (~C)(i ,j+3UL) += sum( xmm4 ) * scalar;
6260  (~C)(i+1UL,j ) += sum( xmm5 ) * scalar;
6261  (~C)(i+1UL,j+1UL) += sum( xmm6 ) * scalar;
6262  (~C)(i+1UL,j+2UL) += sum( xmm7 ) * scalar;
6263  (~C)(i+1UL,j+3UL) += sum( xmm8 ) * scalar;
6264 
6265  for( ; remainder && k<kend; ++k ) {
6266  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6267  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6268  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL) * scalar;
6269  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL) * scalar;
6270  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6271  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6272  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL) * scalar;
6273  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL) * scalar;
6274  }
6275  }
6276 
6277  for( ; (j+2UL) <= jend; j+=2UL )
6278  {
6279  const size_t kbegin( ( IsUpper<MT4>::value )
6280  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6281  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6282  const size_t kend( ( IsLower<MT4>::value )
6283  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
6284  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6285 
6286  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6287  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6288 
6289  SIMDType xmm1, xmm2, xmm3, xmm4;
6290  size_t k( kbegin );
6291 
6292  for( ; k<kpos; k+=SIMDSIZE ) {
6293  const SIMDType a1( A.load(i ,k) );
6294  const SIMDType a2( A.load(i+1UL,k) );
6295  const SIMDType b1( B.load(k,j ) );
6296  const SIMDType b2( B.load(k,j+1UL) );
6297  xmm1 += a1 * b1;
6298  xmm2 += a1 * b2;
6299  xmm3 += a2 * b1;
6300  xmm4 += a2 * b2;
6301  }
6302 
6303  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6304  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6305  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6306  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6307 
6308  for( ; remainder && k<kend; ++k ) {
6309  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6310  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6311  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6312  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6313  }
6314  }
6315 
6316  if( j < jend )
6317  {
6318  const size_t kbegin( ( IsUpper<MT4>::value )
6319  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6320  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6321  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
6322 
6323  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6324  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6325 
6326  SIMDType xmm1, xmm2;
6327  size_t k( kbegin );
6328 
6329  for( ; k<kpos; k+=SIMDSIZE ) {
6330  const SIMDType b1( B.load(k,j) );
6331  xmm1 += A.load(i ,k) * b1;
6332  xmm2 += A.load(i+1UL,k) * b1;
6333  }
6334 
6335  (~C)(i ,j) += sum( xmm1 ) * scalar;
6336  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6337 
6338  for( ; remainder && k<kend; ++k ) {
6339  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
6340  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
6341  }
6342  }
6343  }
6344 
6345  if( i < M )
6346  {
6347  const size_t jend( LOW ? i+1UL : N );
6348  size_t j( UPP ? i : 0UL );
6349 
6350  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
6351  {
6352  const size_t kbegin( ( IsUpper<MT4>::value )
6353  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6354  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6355  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
6356 
6357  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6358  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6359 
6360  SIMDType xmm1, xmm2, xmm3, xmm4;
6361  size_t k( kbegin );
6362 
6363  for( ; k<kpos; k+=SIMDSIZE ) {
6364  const SIMDType a1( A.load(i,k) );
6365  xmm1 += a1 * B.load(k,j );
6366  xmm2 += a1 * B.load(k,j+1UL);
6367  xmm3 += a1 * B.load(k,j+2UL);
6368  xmm4 += a1 * B.load(k,j+3UL);
6369  }
6370 
6371  (~C)(i,j ) += sum( xmm1 ) * scalar;
6372  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6373  (~C)(i,j+2UL) += sum( xmm3 ) * scalar;
6374  (~C)(i,j+3UL) += sum( xmm4 ) * scalar;
6375 
6376  for( ; remainder && k<kend; ++k ) {
6377  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
6378  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
6379  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL) * scalar;
6380  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL) * scalar;
6381  }
6382  }
6383 
6384  for( ; (j+2UL) <= jend; j+=2UL )
6385  {
6386  const size_t kbegin( ( IsUpper<MT4>::value )
6387  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6388  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6389  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
6390 
6391  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6392  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6393 
6394  SIMDType xmm1, xmm2;
6395  size_t k( kbegin );
6396 
6397  for( ; k<kpos; k+=SIMDSIZE ) {
6398  const SIMDType a1( A.load(i,k) );
6399  xmm1 += a1 * B.load(k,j );
6400  xmm2 += a1 * B.load(k,j+1UL);
6401  }
6402 
6403  (~C)(i,j ) += sum( xmm1 ) * scalar;
6404  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6405 
6406  for( ; remainder && k<kend; ++k ) {
6407  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
6408  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
6409  }
6410  }
6411 
6412  if( j < jend )
6413  {
6414  const size_t kbegin( ( IsUpper<MT4>::value )
6415  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6416  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6417 
6418  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
6419  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6420 
6421  SIMDType xmm1;
6422  size_t k( kbegin );
6423 
6424  for( ; k<kpos; k+=SIMDSIZE ) {
6425  xmm1 += A.load(i,k) * B.load(k,j);
6426  }
6427 
6428  (~C)(i,j) += sum( xmm1 ) * scalar;
6429 
6430  for( ; remainder && k<K; ++k ) {
6431  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
6432  }
6433  }
6434  }
6435  }
6436  //**********************************************************************************************
6437 
6438  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
6453  template< typename MT3 // Type of the left-hand side target matrix
6454  , typename MT4 // Type of the left-hand side matrix operand
6455  , typename MT5 // Type of the right-hand side matrix operand
6456  , typename ST2 > // Type of the scalar value
6458  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6459  {
6460  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
6461 
6462  const size_t M( A.rows() );
6463  const size_t N( B.columns() );
6464  const size_t K( A.columns() );
6465 
6466  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
6467 
6468  size_t i( 0UL );
6469 
6470  for( ; !LOW && !UPP && (i+4UL) <= M; i+=4UL )
6471  {
6472  size_t j( 0UL );
6473 
6474  for( ; (j+2UL) <= N; j+=2UL )
6475  {
6476  const size_t kbegin( ( IsUpper<MT4>::value )
6477  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6478  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6479  const size_t kend( ( IsLower<MT4>::value )
6480  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
6481  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6482 
6483  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6484  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6485 
6486  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6487  size_t k( kbegin );
6488 
6489  for( ; k<kpos; k+=SIMDSIZE ) {
6490  const SIMDType a1( A.load(i ,k) );
6491  const SIMDType a2( A.load(i+1UL,k) );
6492  const SIMDType a3( A.load(i+2UL,k) );
6493  const SIMDType a4( A.load(i+3UL,k) );
6494  const SIMDType b1( B.load(k,j ) );
6495  const SIMDType b2( B.load(k,j+1UL) );
6496  xmm1 += a1 * b1;
6497  xmm2 += a1 * b2;
6498  xmm3 += a2 * b1;
6499  xmm4 += a2 * b2;
6500  xmm5 += a3 * b1;
6501  xmm6 += a3 * b2;
6502  xmm7 += a4 * b1;
6503  xmm8 += a4 * b2;
6504  }
6505 
6506  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6507  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6508  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6509  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6510  (~C)(i+2UL,j ) += sum( xmm5 ) * scalar;
6511  (~C)(i+2UL,j+1UL) += sum( xmm6 ) * scalar;
6512  (~C)(i+3UL,j ) += sum( xmm7 ) * scalar;
6513  (~C)(i+3UL,j+1UL) += sum( xmm8 ) * scalar;
6514 
6515  for( ; remainder && k<kend; ++k ) {
6516  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6517  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6518  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6519  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6520  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j ) * scalar;
6521  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL) * scalar;
6522  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j ) * scalar;
6523  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL) * scalar;
6524  }
6525  }
6526 
6527  if( j < N )
6528  {
6529  const size_t kbegin( ( IsUpper<MT4>::value )
6530  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6531  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6532  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
6533 
6534  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6535  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6536 
6537  SIMDType xmm1, xmm2, xmm3, xmm4;
6538  size_t k( kbegin );
6539 
6540  for( ; k<kpos; k+=SIMDSIZE ) {
6541  const SIMDType b1( B.load(k,j) );
6542  xmm1 += A.load(i ,k) * b1;
6543  xmm2 += A.load(i+1UL,k) * b1;
6544  xmm3 += A.load(i+2UL,k) * b1;
6545  xmm4 += A.load(i+3UL,k) * b1;
6546  }
6547 
6548  (~C)(i ,j) += sum( xmm1 ) * scalar;
6549  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6550  (~C)(i+2UL,j) += sum( xmm3 ) * scalar;
6551  (~C)(i+3UL,j) += sum( xmm4 ) * scalar;
6552 
6553  for( ; remainder && k<kend; ++k ) {
6554  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
6555  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
6556  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j) * scalar;
6557  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j) * scalar;
6558  }
6559  }
6560  }
6561 
6562  for( ; (i+2UL) <= M; i+=2UL )
6563  {
6564  const size_t jend( LOW ? i+2UL : N );
6565  size_t j( UPP ? i : 0UL );
6566 
6567  for( ; (j+2UL) <= jend; j+=2UL )
6568  {
6569  const size_t kbegin( ( IsUpper<MT4>::value )
6570  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6571  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6572  const size_t kend( ( IsLower<MT4>::value )
6573  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
6574  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6575 
6576  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6577  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6578 
6579  SIMDType xmm1, xmm2, xmm3, xmm4;
6580  size_t k( kbegin );
6581 
6582  for( ; k<kpos; k+=SIMDSIZE ) {
6583  const SIMDType a1( A.load(i ,k) );
6584  const SIMDType a2( A.load(i+1UL,k) );
6585  const SIMDType b1( B.load(k,j ) );
6586  const SIMDType b2( B.load(k,j+1UL) );
6587  xmm1 += a1 * b1;
6588  xmm2 += a1 * b2;
6589  xmm3 += a2 * b1;
6590  xmm4 += a2 * b2;
6591  }
6592 
6593  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6594  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6595  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6596  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6597 
6598  for( ; remainder && k<kend; ++k ) {
6599  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6600  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6601  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6602  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6603  }
6604  }
6605 
6606  if( j < jend )
6607  {
6608  const size_t kbegin( ( IsUpper<MT4>::value )
6609  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6610  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6611  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
6612 
6613  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6614  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6615 
6616  SIMDType xmm1, xmm2;
6617  size_t k( kbegin );
6618 
6619  for( ; k<kpos; k+=SIMDSIZE ) {
6620  const SIMDType b1( B.load(k,j) );
6621  xmm1 += A.load(i ,k) * b1;
6622  xmm2 += A.load(i+1UL,k) * b1;
6623  }
6624 
6625  (~C)(i ,j) += sum( xmm1 ) * scalar;
6626  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6627 
6628  for( ; remainder && k<kend; ++k ) {
6629  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
6630  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
6631  }
6632  }
6633  }
6634 
6635  if( i < M )
6636  {
6637  const size_t jend( LOW ? i+1UL : N );
6638  size_t j( UPP ? i : 0UL );
6639 
6640  for( ; (j+2UL) <= jend; j+=2UL )
6641  {
6642  const size_t kbegin( ( IsUpper<MT4>::value )
6643  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6644  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6645  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
6646 
6647  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6648  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6649 
6650  SIMDType xmm1, xmm2;
6651  size_t k( kbegin );
6652 
6653  for( ; k<kpos; k+=SIMDSIZE ) {
6654  const SIMDType a1( A.load(i,k) );
6655  xmm1 += a1 * B.load(k,j );
6656  xmm2 += a1 * B.load(k,j+1UL);
6657  }
6658 
6659  (~C)(i,j ) += sum( xmm1 ) * scalar;
6660  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6661 
6662  for( ; remainder && k<kend; ++k ) {
6663  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
6664  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
6665  }
6666  }
6667 
6668  if( j < jend )
6669  {
6670  const size_t kbegin( ( IsUpper<MT4>::value )
6671  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6672  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6673 
6674  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
6675  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6676 
6677  SIMDType xmm1;
6678  size_t k( kbegin );
6679 
6680  for( ; k<kpos; k+=SIMDSIZE ) {
6681  xmm1 += A.load(i,k) * B.load(k,j);
6682  }
6683 
6684  (~C)(i,j) += sum( xmm1 ) * scalar;
6685 
6686  for( ; remainder && k<K; ++k ) {
6687  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
6688  }
6689  }
6690  }
6691  }
6692  //**********************************************************************************************
6693 
6694  //**Default addition assignment to dense matrices (large matrices)******************************
6708  template< typename MT3 // Type of the left-hand side target matrix
6709  , typename MT4 // Type of the left-hand side matrix operand
6710  , typename MT5 // Type of the right-hand side matrix operand
6711  , typename ST2 > // Type of the scalar value
6713  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6714  {
6715  selectDefaultAddAssignKernel( C, A, B, scalar );
6716  }
6717  //**********************************************************************************************
6718 
6719  //**Vectorized default addition assignment to dense matrices (large matrices)*******************
6734  template< typename MT3 // Type of the left-hand side target matrix
6735  , typename MT4 // Type of the left-hand side matrix operand
6736  , typename MT5 // Type of the right-hand side matrix operand
6737  , typename ST2 > // Type of the scalar value
6739  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6740  {
6741  if( LOW )
6742  lmmm( C, A, B, scalar, ST2(1) );
6743  else if( UPP )
6744  ummm( C, A, B, scalar, ST2(1) );
6745  else
6746  mmm( C, A, B, scalar, ST2(1) );
6747  }
6748  //**********************************************************************************************
6749 
6750  //**BLAS-based addition assignment to dense matrices (default)**********************************
6764  template< typename MT3 // Type of the left-hand side target matrix
6765  , typename MT4 // Type of the left-hand side matrix operand
6766  , typename MT5 // Type of the right-hand side matrix operand
6767  , typename ST2 > // Type of the scalar value
6769  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6770  {
6771  selectLargeAddAssignKernel( C, A, B, scalar );
6772  }
6773  //**********************************************************************************************
6774 
6775  //**BLAS-based addition assignment to dense matrices********************************************
6776 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
6777 
6790  template< typename MT3 // Type of the left-hand side target matrix
6791  , typename MT4 // Type of the left-hand side matrix operand
6792  , typename MT5 // Type of the right-hand side matrix operand
6793  , typename ST2 > // Type of the scalar value
6795  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6796  {
6797  typedef ElementType_<MT3> ET;
6798 
6799  if( IsTriangular<MT4>::value ) {
6800  ResultType_<MT3> tmp( serial( B ) );
6801  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6802  addAssign( C, tmp );
6803  }
6804  else if( IsTriangular<MT5>::value ) {
6805  ResultType_<MT3> tmp( serial( A ) );
6806  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6807  addAssign( C, tmp );
6808  }
6809  else {
6810  gemm( C, A, B, ET(scalar), ET(1) );
6811  }
6812  }
6813 #endif
6814  //**********************************************************************************************
6815 
6816  //**Addition assignment to sparse matrices******************************************************
6817  // No special implementation for the addition assignment to sparse matrices.
6818  //**********************************************************************************************
6819 
6820  //**Subtraction assignment to dense matrices****************************************************
6832  template< typename MT // Type of the target dense matrix
6833  , bool SO > // Storage order of the target dense matrix
6834  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
6835  {
6837 
6838  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6839  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6840 
6841  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
6842  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
6843 
6844  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
6845  return;
6846  }
6847 
6848  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
6849  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
6850 
6851  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
6852  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
6853  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
6854  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
6855  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
6856  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
6857 
6858  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
6859  }
6860  //**********************************************************************************************
6861 
6862  //**Subtraction assignment to dense matrices (kernel selection)*********************************
6873  template< typename MT3 // Type of the left-hand side target matrix
6874  , typename MT4 // Type of the left-hand side matrix operand
6875  , typename MT5 // Type of the right-hand side matrix operand
6876  , typename ST2 > // Type of the scalar value
6877  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6878  {
6880  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
6881  selectSmallSubAssignKernel( C, A, B, scalar );
6882  else
6883  selectBlasSubAssignKernel( C, A, B, scalar );
6884  }
6885  //**********************************************************************************************
6886 
6887  //**Default subtraction assignment to dense matrices (general/general)**************************
6901  template< typename MT3 // Type of the left-hand side target matrix
6902  , typename MT4 // Type of the left-hand side matrix operand
6903  , typename MT5 // Type of the right-hand side matrix operand
6904  , typename ST2 > // Type of the scalar value
6905  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
6906  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6907  {
6908  const ResultType tmp( serial( A * B * scalar ) );
6909  subAssign( C, tmp );
6910  }
6911  //**********************************************************************************************
6912 
6913  //**Default subtraction assignment to row-major dense matrices (general/diagonal)***************
6927  template< typename MT3 // Type of the left-hand side target matrix
6928  , typename MT4 // Type of the left-hand side matrix operand
6929  , typename MT5 // Type of the right-hand side matrix operand
6930  , typename ST2 > // Type of the scalar value
6931  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6932  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6933  {
6934  const size_t M( A.rows() );
6935  const size_t N( B.columns() );
6936 
6937  for( size_t i=0UL; i<M; ++i )
6938  {
6939  const size_t jbegin( ( IsUpper<MT4>::value )
6940  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
6941  :( 0UL ) );
6942  const size_t jend( ( IsLower<MT4>::value )
6943  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
6944  :( N ) );
6945  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
6946 
6947  const size_t jnum( jend - jbegin );
6948  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
6949 
6950  for( size_t j=jbegin; j<jpos; j+=2UL ) {
6951  (~C)(i,j ) -= A(i,j ) * B(j ,j ) * scalar;
6952  (~C)(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
6953  }
6954  if( jpos < jend ) {
6955  (~C)(i,jpos) -= A(i,jpos) * B(jpos,jpos) * scalar;
6956  }
6957  }
6958  }
6959  //**********************************************************************************************
6960 
6961  //**Default subtraction assignment to column-major dense matrices (general/diagonal)************
6975  template< typename MT3 // Type of the left-hand side target matrix
6976  , typename MT4 // Type of the left-hand side matrix operand
6977  , typename MT5 // Type of the right-hand side matrix operand
6978  , typename ST2 > // Type of the scalar value
6979  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6980  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6981  {
6982  constexpr size_t block( BLOCK_SIZE );
6983 
6984  const size_t M( A.rows() );
6985  const size_t N( B.columns() );
6986 
6987  for( size_t jj=0UL; jj<N; jj+=block ) {
6988  const size_t jend( min( N, jj+block ) );
6989  for( size_t ii=0UL; ii<M; ii+=block ) {
6990  const size_t iend( min( M, ii+block ) );
6991  for( size_t j=jj; j<jend; ++j )
6992  {
6993  const size_t ibegin( ( IsLower<MT4>::value )
6994  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
6995  :( ii ) );
6996  const size_t ipos( ( IsUpper<MT4>::value )
6997  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
6998  :( iend ) );
6999 
7000  for( size_t i=ibegin; i<ipos; ++i ) {
7001  (~C)(i,j) -= A(i,j) * B(j,j) * scalar;
7002  }
7003  }
7004  }
7005  }
7006  }
7007  //**********************************************************************************************
7008 
7009  //**Default subtraction assignment to row-major dense matrices (diagonal/general)***************
7024  template< typename MT3 // Type of the left-hand side target matrix
7025  , typename MT4 // Type of the left-hand side matrix operand
7026  , typename MT5 // Type of the right-hand side matrix operand
7027  , typename ST2 > // Type of the scalar value
7028  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
7029  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7030  {
7031  constexpr size_t block( BLOCK_SIZE );
7032 
7033  const size_t M( A.rows() );
7034  const size_t N( B.columns() );
7035 
7036  for( size_t ii=0UL; ii<M; ii+=block ) {
7037  const size_t iend( min( M, ii+block ) );
7038  for( size_t jj=0UL; jj<N; jj+=block ) {
7039  const size_t jend( min( N, jj+block ) );
7040  for( size_t i=ii; i<iend; ++i )
7041  {
7042  const size_t jbegin( ( IsUpper<MT5>::value )
7043  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
7044  :( jj ) );
7045  const size_t jpos( ( IsLower<MT5>::value )
7046  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
7047  :( jend ) );
7048 
7049  for( size_t j=jbegin; j<jpos; ++j ) {
7050  (~C)(i,j) -= A(i,i) * B(i,j) * scalar;
7051  }
7052  }
7053  }
7054  }
7055  }
7056  //**********************************************************************************************
7057 
7058  //**Default subtraction assignment to column-major dense matrices (diagonal/general)************
7073  template< typename MT3 // Type of the left-hand side target matrix
7074  , typename MT4 // Type of the left-hand side matrix operand
7075  , typename MT5 // Type of the right-hand side matrix operand
7076  , typename ST2 > // Type of the scalar value
7077  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
7078  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7079  {
7080  const size_t M( A.rows() );
7081  const size_t N( B.columns() );
7082 
7083  for( size_t j=0UL; j<N; ++j )
7084  {
7085  const size_t ibegin( ( IsLower<MT5>::value )
7086  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
7087  :( 0UL ) );
7088  const size_t iend( ( IsUpper<MT5>::value )
7089  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
7090  :( M ) );
7091  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
7092 
7093  const size_t inum( iend - ibegin );
7094  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
7095 
7096  for( size_t i=ibegin; i<ipos; i+=2UL ) {
7097  (~C)(i ,j) -= A(i ,i ) * B(i ,j) * scalar;
7098  (~C)(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
7099  }
7100  if( ipos < iend ) {
7101  (~C)(ipos,j) -= A(ipos,ipos) * B(ipos,j) * scalar;
7102  }
7103  }
7104  }
7105  //**********************************************************************************************
7106 
7107  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
7121  template< typename MT3 // Type of the left-hand side target matrix
7122  , typename MT4 // Type of the left-hand side matrix operand
7123  , typename MT5 // Type of the right-hand side matrix operand
7124  , typename ST2 > // Type of the scalar value
7125  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
7126  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7127  {
7128  for( size_t i=0UL; i<A.rows(); ++i ) {
7129  C(i,i) -= A(i,i) * B(i,i) * scalar;
7130  }
7131  }
7132  //**********************************************************************************************
7133 
7134  //**Default subtraction assignment to dense matrices (small matrices)***************************
7148  template< typename MT3 // Type of the left-hand side target matrix
7149  , typename MT4 // Type of the left-hand side matrix operand
7150  , typename MT5 // Type of the right-hand side matrix operand
7151  , typename ST2 > // Type of the scalar value
7153  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7154  {
7155  selectDefaultSubAssignKernel( C, A, B, scalar );
7156  }
7157  //**********************************************************************************************
7158 
7159  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
7174  template< typename MT3 // Type of the left-hand side target matrix
7175  , typename MT4 // Type of the left-hand side matrix operand
7176  , typename MT5 // Type of the right-hand side matrix operand
7177  , typename ST2 > // Type of the scalar value
7179  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7180  {
7181  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
7182 
7183  const size_t M( A.rows() );
7184  const size_t N( B.columns() );
7185  const size_t K( A.columns() );
7186 
7187  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
7188 
7189  size_t i( 0UL );
7190 
7191  for( ; (i+2UL) <= M; i+=2UL )
7192  {
7193  const size_t jend( LOW ? i+2UL : N );
7194  size_t j( UPP ? i : 0UL );
7195 
7196  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
7197  {
7198  const size_t kbegin( ( IsUpper<MT4>::value )
7199  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7200  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7201  const size_t kend( ( IsLower<MT4>::value )
7202  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
7203  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
7204 
7205  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7206  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7207 
7208  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7209  size_t k( kbegin );
7210 
7211  for( ; k<kpos; k+=SIMDSIZE ) {
7212  const SIMDType a1( A.load(i ,k) );
7213  const SIMDType a2( A.load(i+1UL,k) );
7214  const SIMDType b1( B.load(k,j ) );
7215  const SIMDType b2( B.load(k,j+1UL) );
7216  const SIMDType b3( B.load(k,j+2UL) );
7217  const SIMDType b4( B.load(k,j+3UL) );
7218  xmm1 += a1 * b1;
7219  xmm2 += a1 * b2;
7220  xmm3 += a1 * b3;
7221  xmm4 += a1 * b4;
7222  xmm5 += a2 * b1;
7223  xmm6 += a2 * b2;
7224  xmm7 += a2 * b3;
7225  xmm8 += a2 * b4;
7226  }
7227 
7228  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7229  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7230  (~C)(i ,j+2UL) -= sum( xmm3 ) * scalar;
7231  (~C)(i ,j+3UL) -= sum( xmm4 ) * scalar;
7232  (~C)(i+1UL,j ) -= sum( xmm5 ) * scalar;
7233  (~C)(i+1UL,j+1UL) -= sum( xmm6 ) * scalar;
7234  (~C)(i+1UL,j+2UL) -= sum( xmm7 ) * scalar;
7235  (~C)(i+1UL,j+3UL) -= sum( xmm8 ) * scalar;
7236 
7237  for( ; remainder && k<kend; ++k ) {
7238  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7239  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7240  (~C)(i ,j+2UL) -= A(i ,k) * B(k,j+2UL) * scalar;
7241  (~C)(i ,j+3UL) -= A(i ,k) * B(k,j+3UL) * scalar;
7242  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7243  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7244  (~C)(i+1UL,j+2UL) -= A(i+1UL,k) * B(k,j+2UL) * scalar;
7245  (~C)(i+1UL,j+3UL) -= A(i+1UL,k) * B(k,j+3UL) * scalar;
7246  }
7247  }
7248 
7249  for( ; (j+2UL) <= jend; j+=2UL )
7250  {
7251  const size_t kbegin( ( IsUpper<MT4>::value )
7252  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7253  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7254  const size_t kend( ( IsLower<MT4>::value )
7255  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
7256  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7257 
7258  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7259  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7260 
7261  SIMDType xmm1, xmm2, xmm3, xmm4;
7262  size_t k( kbegin );
7263 
7264  for( ; k<kpos; k+=SIMDSIZE ) {
7265  const SIMDType a1( A.load(i ,k) );
7266  const SIMDType a2( A.load(i+1UL,k) );
7267  const SIMDType b1( B.load(k,j ) );
7268  const SIMDType b2( B.load(k,j+1UL) );
7269  xmm1 += a1 * b1;
7270  xmm2 += a1 * b2;
7271  xmm3 += a2 * b1;
7272  xmm4 += a2 * b2;
7273  }
7274 
7275  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7276  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7277  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7278  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7279 
7280  for( ; remainder && k<kend; ++k ) {
7281  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7282  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7283  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7284  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7285  }
7286  }
7287 
7288  if( j < jend )
7289  {
7290  const size_t kbegin( ( IsUpper<MT4>::value )
7291  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7292  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7293  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
7294 
7295  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7296  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7297 
7298  SIMDType xmm1, xmm2;
7299  size_t k( kbegin );
7300 
7301  for( ; k<kpos; k+=SIMDSIZE ) {
7302  const SIMDType b1( B.load(k,j) );
7303  xmm1 += A.load(i ,k) * b1;
7304  xmm2 += A.load(i+1UL,k) * b1;
7305  }
7306 
7307  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7308  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7309 
7310  for( ; remainder && k<kend; ++k ) {
7311  (~C)(i ,j) -= A(i ,k) * B(k,j) * scalar;
7312  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j) * scalar;
7313  }
7314  }
7315  }
7316 
7317  if( i < M )
7318  {
7319  const size_t jend( LOW ? i+1UL : N );
7320  size_t j( UPP ? i : 0UL );
7321 
7322  for( ; !( LOW && UPP ) && (j+4UL) <= jend; j+=4UL )
7323  {
7324  const size_t kbegin( ( IsUpper<MT4>::value )
7325  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7326  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7327  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
7328 
7329  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7330  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7331 
7332  SIMDType xmm1, xmm2, xmm3, xmm4;
7333  size_t k( kbegin );
7334 
7335  for( ; k<kpos; k+=SIMDSIZE ) {
7336  const SIMDType a1( A.load(i,k) );
7337  xmm1 += a1 * B.load(k,j );
7338  xmm2 += a1 * B.load(k,j+1UL);
7339  xmm3 += a1 * B.load(k,j+2UL);
7340  xmm4 += a1 * B.load(k,j+3UL);
7341  }
7342 
7343  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7344  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7345  (~C)(i,j+2UL) -= sum( xmm3 ) * scalar;
7346  (~C)(i,j+3UL) -= sum( xmm4 ) * scalar;
7347 
7348  for( ; remainder && k<kend; ++k ) {
7349  (~C)(i,j ) -= A(i,k) * B(k,j ) * scalar;
7350  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL) * scalar;
7351  (~C)(i,j+2UL) -= A(i,k) * B(k,j+2UL) * scalar;
7352  (~C)(i,j+3UL) -= A(i,k) * B(k,j+3UL) * scalar;
7353  }
7354  }
7355 
7356  for( ; (j+2UL) <= jend; j+=2UL )
7357  {
7358  const size_t kbegin( ( IsUpper<MT4>::value )
7359  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7360  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7361  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
7362 
7363  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7364  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7365 
7366  SIMDType xmm1, xmm2;
7367  size_t k( kbegin );
7368 
7369  for( ; k<kpos; k+=SIMDSIZE ) {
7370  const SIMDType a1( A.load(i,k) );
7371  xmm1 += a1 * B.load(k,j );
7372  xmm2 += a1 * B.load(k,j+1UL);
7373  }
7374 
7375  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7376  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7377 
7378  for( ; remainder && k<kend; ++k ) {
7379  (~C)(i,j ) -= A(i,k) * B(k,j ) * scalar;
7380  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL) * scalar;
7381  }
7382  }
7383 
7384  if( j < jend )
7385  {
7386  const size_t kbegin( ( IsUpper<MT4>::value )
7387  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7388  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7389 
7390  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
7391  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7392 
7393  SIMDType xmm1;
7394  size_t k( kbegin );
7395 
7396  for( ; k<kpos; k+=SIMDSIZE ) {
7397  xmm1 += A.load(i,k) * B.load(k,j);
7398  }
7399 
7400  (~C)(i,j) -= sum( xmm1 ) * scalar;
7401 
7402  for( ; remainder && k<K; ++k ) {
7403  (~C)(i,j) -= A(i,k) * B(k,j) * scalar;
7404  }
7405  }
7406  }
7407  }
7408  //**********************************************************************************************
7409 
7410  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
7425  template< typename MT3 // Type of the left-hand side target matrix
7426  , typename MT4 // Type of the left-hand side matrix operand
7427  , typename MT5 // Type of the right-hand side matrix operand
7428  , typename ST2 > // Type of the scalar value
7430  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7431  {
7432  constexpr bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
7433 
7434  const size_t M( A.rows() );
7435  const size_t N( B.columns() );
7436  const size_t K( A.columns() );
7437 
7438  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
7439 
7440  size_t i( 0UL );
7441 
7442  for( ; !LOW && !UPP && (i+4UL) <= M; i+=4UL )
7443  {
7444  size_t j( 0UL );
7445 
7446  for( ; (j+2UL) <= N; j+=2UL )
7447  {
7448  const size_t kbegin( ( IsUpper<MT4>::value )
7449  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7450  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7451  const size_t kend( ( IsLower<MT4>::value )
7452  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
7453  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7454 
7455  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7456  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7457 
7458  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7459  size_t k( kbegin );
7460 
7461  for( ; k<kpos; k+=SIMDSIZE )
7462  {
7463  const SIMDType a1( A.load(i ,k) );
7464  const SIMDType a2( A.load(i+1UL,k) );
7465  const SIMDType a3( A.load(i+2UL,k) );
7466  const SIMDType a4( A.load(i+3UL,k) );
7467  const SIMDType b1( B.load(k,j ) );
7468  const SIMDType b2( B.load(k,j+1UL) );
7469  xmm1 += a1 * b1;
7470  xmm2 += a1 * b2;
7471  xmm3 += a2 * b1;
7472  xmm4 += a2 * b2;
7473  xmm5 += a3 * b1;
7474  xmm6 += a3 * b2;
7475  xmm7 += a4 * b1;
7476  xmm8 += a4 * b2;
7477  }
7478 
7479  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7480  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7481  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7482  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7483  (~C)(i+2UL,j ) -= sum( xmm5 ) * scalar;
7484  (~C)(i+2UL,j+1UL) -= sum( xmm6 ) * scalar;
7485  (~C)(i+3UL,j ) -= sum( xmm7 ) * scalar;
7486  (~C)(i+3UL,j+1UL) -= sum( xmm8 ) * scalar;
7487 
7488  for( ; remainder && k<kend; ++k ) {
7489  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7490  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7491  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7492  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7493  (~C)(i+2UL,j ) -= A(i+2UL,k) * B(k,j ) * scalar;
7494  (~C)(i+2UL,j+1UL) -= A(i+2UL,k) * B(k,j+1UL) * scalar;
7495  (~C)(i+3UL,j ) -= A(i+3UL,k) * B(k,j ) * scalar;
7496  (~C)(i+3UL,j+1UL) -= A(i+3UL,k) * B(k,j+1UL) * scalar;
7497  }
7498  }
7499 
7500  if( j < N )
7501  {
7502  const size_t kbegin( ( IsUpper<MT4>::value )
7503  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7504  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7505  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
7506 
7507  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7508  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7509 
7510  SIMDType xmm1, xmm2, xmm3, xmm4;
7511  size_t k( kbegin );
7512 
7513  for( ; k<kpos; k+=SIMDSIZE ) {
7514  const SIMDType b1( B.load(k,j) );
7515  xmm1 += A.load(i ,k) * b1;
7516  xmm2 += A.load(i+1UL,k) * b1;
7517  xmm3 += A.load(i+2UL,k) * b1;
7518  xmm4 += A.load(i+3UL,k) * b1;
7519  }
7520 
7521  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7522  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7523  (~C)(i+2UL,j) -= sum( xmm3 ) * scalar;
7524  (~C)(i+3UL,j) -= sum( xmm4 ) * scalar;
7525 
7526  for( ; remainder && k<kend; ++k ) {
7527  (~C)(i ,j) -= A(i ,k) * B(k,j) * scalar;
7528  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j) * scalar;
7529  (~C)(i+2UL,j) -= A(i+2UL,k) * B(k,j) * scalar;
7530  (~C)(i+3UL,j) -= A(i+3UL,k) * B(k,j) * scalar;
7531  }
7532  }
7533  }
7534 
7535  for( ; (i+2UL) <= M; i+=2UL )
7536  {
7537  const size_t jend( LOW ? i+2UL : N );
7538  size_t j( UPP ? i : 0UL );
7539 
7540  for( ; (j+2UL) <= jend; j+=2UL )
7541  {
7542  const size_t kbegin( ( IsUpper<MT4>::value )
7543  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7544  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7545  const size_t kend( ( IsLower<MT4>::value )
7546  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
7547  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7548 
7549  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7550  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7551 
7552  SIMDType xmm1, xmm2, xmm3, xmm4;
7553  size_t k( kbegin );
7554 
7555  for( ; k<kpos; k+=SIMDSIZE ) {
7556  const SIMDType a1( A.load(i ,k) );
7557  const SIMDType a2( A.load(i+1UL,k) );
7558  const SIMDType b1( B.load(k,j ) );
7559  const SIMDType b2( B.load(k,j+1UL) );
7560  xmm1 += a1 * b1;
7561  xmm2 += a1 * b2;
7562  xmm3 += a2 * b1;
7563  xmm4 += a2 * b2;
7564  }
7565 
7566  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7567  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7568  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7569  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7570 
7571  for( ; remainder && k<kend; ++k ) {
7572  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7573  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7574  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7575  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7576  }
7577  }
7578 
7579  if( j < jend )
7580  {
7581  const size_t kbegin( ( IsUpper<MT4>::value )
7582  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7583  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7584  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
7585 
7586  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7587  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7588 
7589  SIMDType xmm1, xmm2;
7590  size_t k( kbegin );
7591 
7592  for( ; k<kpos; k+=SIMDSIZE ) {
7593  const SIMDType b1( B.load(k,j) );
7594  xmm1 += A.load(i ,k) * b1;
7595  xmm2 += A.load(i+1UL,k) * b1;
7596  }
7597 
7598  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7599  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7600 
7601  for( ; remainder && k<kend; ++k ) {
7602  (~C)(i ,j) -= A(i ,k) * B(k,j) * scalar;
7603  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j) * scalar;
7604  }
7605  }
7606  }
7607 
7608  if( i < M )
7609  {
7610  const size_t jend( LOW ? i+1UL : N );
7611  size_t j( UPP ? i : 0UL );
7612 
7613  for( ; (j+2UL) <= jend; j+=2UL )
7614  {
7615  const size_t kbegin( ( IsUpper<MT4>::value )
7616  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7617  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7618  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
7619 
7620  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7621  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7622 
7623  SIMDType xmm1, xmm2;
7624  size_t k( kbegin );
7625 
7626  for( ; k<kpos; k+=SIMDSIZE ) {
7627  const SIMDType a1( A.load(i,k) );
7628  xmm1 += a1 * B.load(k,j );
7629  xmm2 += a1 * B.load(k,j+1UL);
7630  }
7631 
7632  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7633  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7634 
7635  for( ; remainder && k<kend; ++k ) {
7636  (~C)(i,j ) -= A(i,k) * B(k,j ) * scalar;
7637  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL) * scalar;
7638  }
7639  }
7640 
7641  if( j < jend )
7642  {
7643  const size_t kbegin( ( IsUpper<MT4>::value )
7644  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7645  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7646 
7647  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
7648  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7649 
7650  SIMDType xmm1;
7651  size_t k( kbegin );
7652 
7653  for( ; k<kpos; k+=SIMDSIZE ) {
7654  xmm1 += A.load(i,k) * B.load(k,j);
7655  }
7656 
7657  (~C)(i,j) -= sum( xmm1 ) * scalar;
7658 
7659  for( ; remainder && k<K; ++k ) {
7660  (~C)(i,j) -= A(i,k) * B(k,j) * scalar;
7661  }
7662  }
7663  }
7664  }
7665  //**********************************************************************************************
7666 
7667  //**Default subtraction assignment to dense matrices (large matrices)***************************
7681  template< typename MT3 // Type of the left-hand side target matrix
7682  , typename MT4 // Type of the left-hand side matrix operand
7683  , typename MT5 // Type of the right-hand side matrix operand
7684  , typename ST2 > // Type of the scalar value
7686  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7687  {
7688  selectDefaultSubAssignKernel( C, A, B, scalar );
7689  }
7690  //**********************************************************************************************
7691 
7692  //**Vectorized default subtraction assignment to dense matrices (large matrices)****************
7707  template< typename MT3 // Type of the left-hand side target matrix
7708  , typename MT4 // Type of the left-hand side matrix operand
7709  , typename MT5 // Type of the right-hand side matrix operand
7710  , typename ST2 > // Type of the scalar value
7712  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7713  {
7714  if( LOW )
7715  lmmm( C, A, B, -scalar, ST2(1) );
7716  else if( UPP )
7717  ummm( C, A, B, -scalar, ST2(1) );
7718  else
7719  mmm( C, A, B, -scalar, ST2(1) );
7720  }
7721  //**********************************************************************************************
7722 
7723  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
7737  template< typename MT3 // Type of the left-hand side target matrix
7738  , typename MT4 // Type of the left-hand side matrix operand
7739  , typename MT5 // Type of the right-hand side matrix operand
7740  , typename ST2 > // Type of the scalar value
7742  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7743  {
7744  selectLargeSubAssignKernel( C, A, B, scalar );
7745  }
7746  //**********************************************************************************************
7747 
7748  //**BLAS-based subraction assignment to dense matrices******************************************
7749 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
7750 
7763  template< typename MT3 // Type of the left-hand side target matrix
7764  , typename MT4 // Type of the left-hand side matrix operand
7765  , typename MT5 // Type of the right-hand side matrix operand
7766  , typename ST2 > // Type of the scalar value
7768  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7769  {
7770  typedef ElementType_<MT3> ET;
7771 
7772  if( IsTriangular<MT4>::value ) {
7773  ResultType_<MT3> tmp( serial( B ) );
7774  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7775  subAssign( C, tmp );
7776  }
7777  else if( IsTriangular<MT5>::value ) {
7778  ResultType_<MT3> tmp( serial( A ) );
7779  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7780  subAssign( C, tmp );
7781  }
7782  else {
7783  gemm( C, A, B, ET(-scalar), ET(1) );
7784  }
7785  }
7786 #endif
7787  //**********************************************************************************************
7788 
7789  //**Subtraction assignment to sparse matrices***************************************************
7790  // No special implementation for the subtraction assignment to sparse matrices.
7791  //**********************************************************************************************
7792 
7793  //**Multiplication assignment to dense matrices*************************************************
7794  // No special implementation for the multiplication assignment to dense matrices.
7795  //**********************************************************************************************
7796 
7797  //**Multiplication assignment to sparse matrices************************************************
7798  // No special implementation for the multiplication assignment to sparse matrices.
7799  //**********************************************************************************************
7800 
7801  //**SMP assignment to dense matrices************************************************************
7816  template< typename MT // Type of the target dense matrix
7817  , bool SO > // Storage order of the target dense matrix
7819  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7820  {
7822 
7823  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7824  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7825 
7826  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7827  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7828 
7829  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
7830  return;
7831  }
7832  else if( left.columns() == 0UL ) {
7833  reset( ~lhs );
7834  return;
7835  }
7836 
7837  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7838  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7839 
7840  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7841  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7842  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7843  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7844  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7845  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7846 
7847  smpAssign( ~lhs, A * B * rhs.scalar_ );
7848  }
7849  //**********************************************************************************************
7850 
7851  //**SMP assignment to sparse matrices***********************************************************
7866  template< typename MT // Type of the target sparse matrix
7867  , bool SO > // Storage order of the target sparse matrix
7870  {
7872 
7874 
7881 
7882  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7883  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7884 
7885  const ForwardFunctor fwd;
7886 
7887  const TmpType tmp( rhs );
7888  smpAssign( ~lhs, fwd( tmp ) );
7889  }
7890  //**********************************************************************************************
7891 
7892  //**SMP addition assignment to dense matrices***************************************************
7907  template< typename MT // Type of the target dense matrix
7908  , bool SO > // Storage order of the target dense matrix
7911  {
7913 
7914  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7915  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7916 
7917  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7918  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7919 
7920  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7921  return;
7922  }
7923 
7924  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7925  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7926 
7927  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7928  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7929  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7930  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7931  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7932  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7933 
7934  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
7935  }
7936  //**********************************************************************************************
7937 
7938  //**SMP addition assignment to sparse matrices**************************************************
7939  // No special implementation for the SMP addition assignment to sparse matrices.
7940  //**********************************************************************************************
7941 
7942  //**SMP subtraction assignment to dense matrices************************************************
7957  template< typename MT // Type of the target dense matrix
7958  , bool SO > // Storage order of the target dense matrix
7961  {
7963 
7964  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7965  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7966 
7967  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7968  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7969 
7970  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7971  return;
7972  }
7973 
7974  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7975  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7976 
7977  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7978  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7979  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7980  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7981  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7982  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7983 
7984  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
7985  }
7986  //**********************************************************************************************
7987 
7988  //**SMP subtraction assignment to sparse matrices***********************************************
7989  // No special implementation for the SMP subtraction assignment to sparse matrices.
7990  //**********************************************************************************************
7991 
7992  //**SMP multiplication assignment to dense matrices*********************************************
7993  // No special implementation for the SMP multiplication assignment to dense matrices.
7994  //**********************************************************************************************
7995 
7996  //**SMP multiplication assignment to sparse matrices********************************************
7997  // No special implementation for the SMP multiplication assignment to sparse matrices.
7998  //**********************************************************************************************
7999 
8000  //**Compile time checks*************************************************************************
8008  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
8009  //**********************************************************************************************
8010 };
8012 //*************************************************************************************************
8013 
8014 
8015 
8016 
8017 //=================================================================================================
8018 //
8019 // GLOBAL BINARY ARITHMETIC OPERATORS
8020 //
8021 //=================================================================================================
8022 
8023 //*************************************************************************************************
8053 template< typename T1 // Type of the left-hand side dense matrix
8054  , typename T2 > // Type of the right-hand side dense matrix
8057 {
8059 
8060  if( (~lhs).columns() != (~rhs).rows() ) {
8061  BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
8062  }
8063 
8065 }
8066 //*************************************************************************************************
8067 
8068 
8069 
8070 
8071 //=================================================================================================
8072 //
8073 // GLOBAL FUNCTIONS
8074 //
8075 //=================================================================================================
8076 
8077 //*************************************************************************************************
8102 template< typename MT1 // Type of the left-hand side dense matrix
8103  , typename MT2 // Type of the right-hand side dense matrix
8104  , bool SF // Symmetry flag
8105  , bool HF // Hermitian flag
8106  , bool LF // Lower flag
8107  , bool UF > // Upper flag
8110 {
8112 
8113  if( !isSquare( dm ) ) {
8114  BLAZE_THROW_INVALID_ARGUMENT( "Invalid symmetric matrix specification" );
8115  }
8116 
8118 }
8120 //*************************************************************************************************
8121 
8122 
8123 //*************************************************************************************************
8148 template< typename MT1 // Type of the left-hand side dense matrix
8149  , typename MT2 // Type of the right-hand side dense matrix
8150  , bool SF // Symmetry flag
8151  , bool HF // Hermitian flag
8152  , bool LF // Lower flag
8153  , bool UF > // Upper flag
8156 {
8158 
8159  if( !isSquare( dm ) ) {
8160  BLAZE_THROW_INVALID_ARGUMENT( "Invalid Hermitian matrix specification" );
8161  }
8162 
8164 }
8166 //*************************************************************************************************
8167 
8168 
8169 //*************************************************************************************************
8194 template< typename MT1 // Type of the left-hand side dense matrix
8195  , typename MT2 // Type of the right-hand side dense matrix
8196  , bool SF // Symmetry flag
8197  , bool HF // Hermitian flag
8198  , bool LF // Lower flag
8199  , bool UF > // Upper flag
8202 {
8204 
8205  if( !isSquare( dm ) ) {
8206  BLAZE_THROW_INVALID_ARGUMENT( "Invalid lower matrix specification" );
8207  }
8208 
8210 }
8212 //*************************************************************************************************
8213 
8214 
8215 //*************************************************************************************************
8240 template< typename MT1 // Type of the left-hand side dense matrix
8241  , typename MT2 // Type of the right-hand side dense matrix
8242  , bool SF // Symmetry flag
8243  , bool HF // Hermitian flag
8244  , bool LF // Lower flag
8245  , bool UF > // Upper flag
8248 {
8250 
8251  if( !isSquare( dm ) ) {
8252  BLAZE_THROW_INVALID_ARGUMENT( "Invalid upper matrix specification" );
8253  }
8254 
8256 }
8258 //*************************************************************************************************
8259 
8260 
8261 //*************************************************************************************************
8286 template< typename MT1 // Type of the left-hand side dense matrix
8287  , typename MT2 // Type of the right-hand side dense matrix
8288  , bool SF // Symmetry flag
8289  , bool HF // Hermitian flag
8290  , bool LF // Lower flag
8291  , bool UF > // Upper flag
8294 {
8296 
8297  if( !isSquare( dm ) ) {
8298  BLAZE_THROW_INVALID_ARGUMENT( "Invalid diagonal matrix specification" );
8299  }
8300 
8302 }
8304 //*************************************************************************************************
8305 
8306 
8307 
8308 
8309 //=================================================================================================
8310 //
8311 // ROWS SPECIALIZATIONS
8312 //
8313 //=================================================================================================
8314 
8315 //*************************************************************************************************
8317 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8318 struct Rows< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> > : public Rows<MT1>
8319 {};
8321 //*************************************************************************************************
8322 
8323 
8324 
8325 
8326 //=================================================================================================
8327 //
8328 // COLUMNS SPECIALIZATIONS
8329 //
8330 //=================================================================================================
8331 
8332 //*************************************************************************************************
8334 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8335 struct Columns< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> > : public Columns<MT2>
8336 {};
8338 //*************************************************************************************************
8339 
8340 
8341 
8342 
8343 //=================================================================================================
8344 //
8345 // ISALIGNED SPECIALIZATIONS
8346 //
8347 //=================================================================================================
8348 
8349 //*************************************************************************************************
8351 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8352 struct IsAligned< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8353  : public BoolConstant< And< IsAligned<MT1>, IsAligned<MT2> >::value >
8354 {};
8356 //*************************************************************************************************
8357 
8358 
8359 
8360 
8361 //=================================================================================================
8362 //
8363 // ISSYMMETRIC SPECIALIZATIONS
8364 //
8365 //=================================================================================================
8366 
8367 //*************************************************************************************************
8369 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8370 struct IsSymmetric< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8371  : public BoolConstant< Or< Bool<SF>
8372  , And< Bool<HF>
8373  , IsBuiltin< ElementType_< DMatTDMatMultExpr<MT1,MT2,false,true,false,false> > > >
8374  , And< Bool<LF>, Bool<UF> > >::value >
8375 {};
8377 //*************************************************************************************************
8378 
8379 
8380 
8381 
8382 //=================================================================================================
8383 //
8384 // ISHERMITIAN SPECIALIZATIONS
8385 //
8386 //=================================================================================================
8387 
8388 //*************************************************************************************************
8390 template< typename MT1, typename MT2, bool SF, bool LF, bool UF >
8391 struct IsHermitian< DMatTDMatMultExpr<MT1,MT2,SF,true,LF,UF> >
8392  : public TrueType
8393 {};
8395 //*************************************************************************************************
8396 
8397 
8398 
8399 
8400 //=================================================================================================
8401 //
8402 // ISLOWER SPECIALIZATIONS
8403 //
8404 //=================================================================================================
8405 
8406 //*************************************************************************************************
8408 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8409 struct IsLower< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8410  : public BoolConstant< Or< Bool<LF>
8411  , And< IsLower<MT1>, IsLower<MT2> >
8412  , And< Or< Bool<SF>, Bool<HF> >
8413  , IsUpper<MT1>, IsUpper<MT2> > >::value >
8414 {};
8416 //*************************************************************************************************
8417 
8418 
8419 
8420 
8421 //=================================================================================================
8422 //
8423 // ISUNILOWER SPECIALIZATIONS
8424 //
8425 //=================================================================================================
8426 
8427 //*************************************************************************************************
8429 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8430 struct IsUniLower< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8431  : public BoolConstant< Or< And< IsUniLower<MT1>, IsUniLower<MT2> >
8432  , And< Or< Bool<SF>, Bool<HF> >
8433  , IsUniUpper<MT1>, IsUniUpper<MT2> > >::value >
8434 {};
8436 //*************************************************************************************************
8437 
8438 
8439 
8440 
8441 //=================================================================================================
8442 //
8443 // ISSTRICTLYLOWER SPECIALIZATIONS
8444 //
8445 //=================================================================================================
8446 
8447 //*************************************************************************************************
8449 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8450 struct IsStrictlyLower< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8451  : public BoolConstant< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
8452  , And< IsStrictlyLower<MT2>, IsLower<MT1> >
8453  , And< Or< Bool<SF>, Bool<HF> >
8454  , Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
8455  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > > > >::value >
8456 {};
8458 //*************************************************************************************************
8459 
8460 
8461 
8462 
8463 //=================================================================================================
8464 //
8465 // ISUPPER SPECIALIZATIONS
8466 //
8467 //=================================================================================================
8468 
8469 //*************************************************************************************************
8471 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8472 struct IsUpper< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8473  : public BoolConstant< Or< Bool<UF>
8474  , And< IsUpper<MT1>, IsUpper<MT2> >
8475  , And< Or< Bool<SF>, Bool<HF> >
8476  , IsLower<MT1>, IsLower<MT2> > >::value >
8477 {};
8479 //*************************************************************************************************
8480 
8481 
8482 
8483 
8484 //=================================================================================================
8485 //
8486 // ISUNIUPPER SPECIALIZATIONS
8487 //
8488 //=================================================================================================
8489 
8490 //*************************************************************************************************
8492 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8493 struct IsUniUpper< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8494  : public BoolConstant< Or< And< IsUniUpper<MT1>, IsUniUpper<MT2> >
8495  , And< Or< Bool<SF>, Bool<HF> >
8496  , IsUniLower<MT1>, IsUniLower<MT2> > >::value >
8497 {};
8499 //*************************************************************************************************
8500 
8501 
8502 
8503 
8504 //=================================================================================================
8505 //
8506 // ISSTRICTLYUPPER SPECIALIZATIONS
8507 //
8508 //=================================================================================================
8509 
8510 //*************************************************************************************************
8512 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8513 struct IsStrictlyUpper< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8514  : public BoolConstant< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
8515  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> >
8516  , And< Or< Bool<SF>, Bool<HF> >
8517  , Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
8518  , And< IsStrictlyLower<MT2>, IsLower<MT1> > > > >::value >
8519 {};
8521 //*************************************************************************************************
8522 
8523 
8524 
8525 
8526 //=================================================================================================
8527 //
8528 // EXPRESSION TRAIT SPECIALIZATIONS
8529 //
8530 //=================================================================================================
8531 
8532 //*************************************************************************************************
8534 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF, typename VT >
8535 struct DMatDVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, VT >
8536 {
8537  public:
8538  //**********************************************************************************************
8543  , INVALID_TYPE >;
8544  //**********************************************************************************************
8545 };
8547 //*************************************************************************************************
8548 
8549 
8550 //*************************************************************************************************
8552 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF, typename VT >
8553 struct DMatSVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, VT >
8554 {
8555  public:
8556  //**********************************************************************************************
8561  , INVALID_TYPE >;
8562  //**********************************************************************************************
8563 };
8565 //*************************************************************************************************
8566 
8567 
8568 //*************************************************************************************************
8570 template< typename VT, typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8571 struct TDVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8572 {
8573  public:
8574  //**********************************************************************************************
8579  , INVALID_TYPE >;
8580  //**********************************************************************************************
8581 };
8583 //*************************************************************************************************
8584 
8585 
8586 //*************************************************************************************************
8588 template< typename VT, typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8589 struct TSVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8590 {
8591  public:
8592  //**********************************************************************************************
8597  , INVALID_TYPE >;
8598  //**********************************************************************************************
8599 };
8601 //*************************************************************************************************
8602 
8603 
8604 //*************************************************************************************************
8606 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8607 struct DMatDeclSymExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8608 {
8609  public:
8610  //**********************************************************************************************
8614  , INVALID_TYPE >;
8615  //**********************************************************************************************
8616 };
8618 //*************************************************************************************************
8619 
8620 
8621 //*************************************************************************************************
8623 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8624 struct DMatDeclHermExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8625 {
8626  public:
8627  //**********************************************************************************************
8631  , INVALID_TYPE >;
8632  //**********************************************************************************************
8633 };
8635 //*************************************************************************************************
8636 
8637 
8638 //*************************************************************************************************
8640 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8641 struct DMatDeclLowExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8642 {
8643  public:
8644  //**********************************************************************************************
8648  , INVALID_TYPE >;
8649  //**********************************************************************************************
8650 };
8652 //*************************************************************************************************
8653 
8654 
8655 //*************************************************************************************************
8657 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8658 struct DMatDeclUppExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8659 {
8660  public:
8661  //**********************************************************************************************
8665  , INVALID_TYPE >;
8666  //**********************************************************************************************
8667 };
8669 //*************************************************************************************************
8670 
8671 
8672 //*************************************************************************************************
8674 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8675 struct DMatDeclDiagExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8676 {
8677  public:
8678  //**********************************************************************************************
8682  , INVALID_TYPE >;
8683  //**********************************************************************************************
8684 };
8686 //*************************************************************************************************
8687 
8688 
8689 //*************************************************************************************************
8691 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF, bool AF >
8692 struct SubmatrixExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, AF >
8693 {
8694  public:
8695  //**********************************************************************************************
8698  //**********************************************************************************************
8699 };
8701 //*************************************************************************************************
8702 
8703 
8704 //*************************************************************************************************
8706 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8707 struct RowExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8708 {
8709  public:
8710  //**********************************************************************************************
8711  using Type = MultExprTrait_< RowExprTrait_<const MT1>, MT2 >;
8712  //**********************************************************************************************
8713 };
8715 //*************************************************************************************************
8716 
8717 
8718 //*************************************************************************************************
8720 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8721 struct ColumnExprTrait< DMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8722 {
8723  public:
8724  //**********************************************************************************************
8726  //**********************************************************************************************
8727 };
8729 //*************************************************************************************************
8730 
8731 } // namespace blaze
8732 
8733 #endif
typename SubmatrixExprTrait< MT, AF >::Type SubmatrixExprTrait_
Auxiliary alias declaration for the SubmatrixExprTrait type trait.The SubmatrixExprTrait_ alias decla...
Definition: SubmatrixExprTrait.h:134
If_< IsExpression< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:288
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Evaluation of the expression type of a dense matrix declupp operation.Via this type trait it is possi...
Definition: DMatDeclUppExprTrait.h:75
Compile time check for row vector types.This type trait tests whether or not the given template argum...
Definition: IsRowVector.h:80
const DMatForEachExpr< MT, Conj, SO > conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatForEachExpr.h:1214
Header file for auxiliary alias declarations.
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:72
Constraint on the data type.
Header file for kernel specific block sizes.
Header file for mathematical functions.
constexpr bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
Header file for the Rows type trait.
Header file for the IsUniUpper type trait.
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:87
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE const complex< int8_t > sum(const SIMDcint8 &a) noexcept
Returns the sum of all elements in the 8-bit integral complex SIMD vector.
Definition: Reduction.h:63
Header file for the DMatDeclDiagExprTrait class template.
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the serial shim.
Header file for the IsDiagonal type trait.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: DMatScalarMultExpr.h:560
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: DMatTDMatMultExpr.h:386
Generic wrapper for a compile time constant integral value.The IntegralConstant class template repres...
Definition: IntegralConstant.h:71
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the ColumnExprTrait class template.
Header file for the DeclUpp functor.
Header file for the IsSame and IsStrictlySame type traits.
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:194
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:162
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:315
Header file for the IsColumnMajorMatrix type trait.
RightOperand scalar_
Right-hand side scalar of the multiplication expression.
Definition: DMatScalarMultExpr.h:633
Header file for the dense matrix multiplication kernels.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:533
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: DMatScalarMultExpr.h:550
Header file for the IsRowVector type trait.
typename DisableIf< Condition, T >::Type DisableIf_
Auxiliary type for the DisableIf class template.The DisableIf_ alias declaration provides a convenien...
Definition: DisableIf.h:223
Base class for all matrix/scalar multiplication expression templates.The MatScalarMultExpr class serv...
Definition: MatScalarMultExpr.h:66
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1755
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:88
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:163
CompositeType_< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:177
Header file for the TDVecSMatMultExprTrait class template.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:245
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member constant is set to true, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to false, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:138
DisableIf_< IsSymmetric< MT >, const DMatDeclSymExpr< MT, SO > > declsym(const DenseMatrix< MT, SO > &dm)
Declares the given non-symmetric dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:841
Evaluation of the expression type of a dense matrix/dense vector multiplication.Via this type trait i...
Definition: DMatDVecMultExprTrait.h:78
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:88
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Header file for the IsUniLower type trait.
Header file for the IsBLASCompatible type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:323
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1802
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:71
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:119
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:486
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:402
Constraint on the data type.
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
Header file for the IsComplexDouble type trait.
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:323
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:284
Evaluation of the expression type of a dense matrix declsym operation.Via this type trait it is possi...
Definition: DMatDeclSymExprTrait.h:75
Constraint on the data type.
typename MultExprTrait< T1, T2 >::Type MultExprTrait_
Auxiliary alias declaration for the MultExprTrait class template.The MultExprTrait_ alias declaration...
Definition: MultExprTrait.h:344
Header file for the MultExprTrait class template.
DisableIf_< IsHermitian< MT >, const DMatDeclHermExpr< MT, SO > > declherm(const DenseMatrix< MT, SO > &dm)
Declares the given non-Hermitian dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:841
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:72
SubvectorExprTrait_< VT, unaligned > subvector(Vector< VT, TF > &vector, size_t index, size_t size)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:152
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
Compile time check for upper unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniUpper.h:86
CompositeType_< MT1 > CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:176
Header file for the DisableIf class template.
Compile time check for dense vector types.This type trait tests whether or not the given template par...
Definition: IsDenseVector.h:78
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsSymmetric type trait.
Flag for upper matrices.
Definition: DMatTDMatMultExpr.h:196
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the DeclLow functor.
Header file for the IsDouble type trait.
Header file for the If class template.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:83
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
IfTrue_< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:297
IfTrue_< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:294
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Flag for symmetric matrices.
Definition: DMatTDMatMultExpr.h:193
Header file for the Or class template.
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:456
Expression object for dense matrix-scalar multiplications.The DMatScalarMultExpr class represents the...
Definition: DMatScalarMultExpr.h:123
Header file for the TDMatSVecMultExprTrait class template.
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
ElementType_< RT1 > ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:174
Header file for the DenseMatrix base class.
Header file for the DMatDeclLowExprTrait class template.
Header file for the Columns type trait.
Header file for the Not class template.
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
Evaluation of the expression type of a dense matrix/sparse vector multiplication.Via this type trait ...
Definition: DMatSVecMultExprTrait.h:80
Compile time check for sparse vector types.This type trait tests whether or not the given template pa...
Definition: IsSparseVector.h:78
Evaluation of the expression type type of a submatrix operation.Via this type trait it is possible to...
Definition: SubmatrixExprTrait.h:80
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:71
Header file for the IsLower type trait.
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:444
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:90
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBLASCompatible.h:80
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT >, IsDeclExpr< MT > >, RowExprTrait_< MT > > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:128
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:60
Generic wrapper for the null function.
Definition: Noop.h:58
Header file for the IsTriangular type trait.
Header file for the DMatDeclUppExprTrait class template.
Header file for the DMatDeclSymExprTrait class template.
Compile time check for column vector types.This type trait tests whether or not the given template ar...
Definition: IsColumnVector.h:80
Flag for lower matrices.
Definition: DMatTDMatMultExpr.h:195
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Header file for the exception macros of the math module.
DisableIf_< IsLower< MT >, const DMatDeclLowExpr< MT, SO > > decllow(const DenseMatrix< MT, SO > &dm)
Declares the given non-lower dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:842
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:487
Evaluation of the expression type type of a row operation.Via this type trait it is possible to evalu...
Definition: RowExprTrait.h:79
LeftOperand matrix_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatScalarMultExpr.h:632
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:260
Header file for the DeclDiag functor.
Evaluation of the expression type of a dense matrix declherm operation.Via this type trait it is poss...
Definition: DMatDeclHermExprTrait.h:75
Compile time check for dense matrix types.This type trait tests whether or not the given template par...
Definition: IsDenseMatrix.h:78
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT >, IsDeclExpr< MT > >, ColumnExprTrait_< MT > > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:128
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:281
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
#define BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
Compilation switch for the BLAS matrix/matrix multiplication kernels (gemv).This compilation switch e...
Definition: BLAS.h:93
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:109
Compile time check for lower unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniLower.h:86
MultTrait_< RT1, RT2 > ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:279
Header file for the conjugate shim.
Header file for the IsNumeric type trait.
typename T::LeftOperand LeftOperand_
Alias declaration for nested LeftOperand type definitions.The LeftOperand_ alias declaration provides...
Definition: Aliases.h:203
Header file for the HasConstDataAccess type trait.
System settings for the BLAS mode.
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatTDMatMultExpr.h:466
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSIMDCombinable type trait.
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
Header file for the MatScalarMultExpr base class.
typename TDVecTDMatMultExprTrait< VT, MT >::Type TDVecTDMatMultExprTrait_
Auxiliary alias declaration for the TDVecTDMatMultExprTrait class template.The TDVecTDMatMultExprTrai...
Definition: TDVecTDMatMultExprTrait.h:120
ResultType_< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:173
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:83
Utility type for generic codes.
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: DMatTDMatMultExpr.h:283
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:160
ElementType_< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:175
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:61
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:93
Compile time type negation.The Not class template negates the given compile time condition. In case the given condition would evaluate to true, the nested member enumeration is set to false and vice versa:
Definition: Not.h:70
RightOperand rightOperand() const noexcept
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:432
Header file for the DMatDeclHermExprTrait class template.
Compile time check for Hermitian matrices.This type trait tests whether or not the given template par...
Definition: IsHermitian.h:85
Compile time check for built-in data types.This type trait tests whether or not the given template pa...
Definition: IsBuiltin.h:75
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
Header file for the HasMutableDataAccess type trait.
Header file for the Noop functor.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:113
Header file for BLAS triangular matrix/matrix multiplication functions (trmm)
typename DMatDVecMultExprTrait< MT, VT >::Type DMatDVecMultExprTrait_
Auxiliary alias declaration for the DMatDVecMultExprTrait class template.The DMatDVecMultExprTrait_ a...
Definition: DMatDVecMultExprTrait.h:119
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:223
ElementType_< ResultType > ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:282
ResultType_< MT1 > RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:172
typename T::OppositeType OppositeType_
Alias declaration for nested OppositeType type definitions.The OppositeType_ alias declaration provid...
Definition: Aliases.h:243
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Header file for the IsDenseVector type trait.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:285
LeftOperand leftOperand() const noexcept
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:422
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
Evaluation of the expression type of a dense vector/dense matrix multiplication.Via this type trait i...
Definition: TDVecDMatMultExprTrait.h:78
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:338
Evaluation of the expression type of a sparse vector/dense matrix multiplication.Via this type trait ...
Definition: TSVecDMatMultExprTrait.h:78
Compile time check for complex types.This type trait tests whether or not the given template paramete...
Definition: IsComplex.h:76
Header file for the IsRowMajorMatrix type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:55
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
If_< IsExpression< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:291
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
DMatTDMatMultExpr< MT1, MT2, SF, HF, LF, UF > This
Type of this DMatTDMatMultExpr instance.
Definition: DMatTDMatMultExpr.h:277
Header file for the TDMatDVecMultExprTrait class template.
Header file for BLAS general matrix/matrix multiplication functions (gemm)
Header file for the IsComplexFloat type trait.
Header file for the IntegralConstant class template.
Compile time evaluation of the number of columns of a matrix.The Columns type trait evaluates the num...
Definition: Columns.h:76
OppositeType_< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:280
Evaluation of the expression type of a dense matrix decllow operation.Via this type trait it is possi...
Definition: DMatDeclLowExprTrait.h:75
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
Evaluation of the expression type of a dense matrix decldiag operation.Via this type trait it is poss...
Definition: DMatDeclDiagExprTrait.h:75
Compile time evaluation of the number of rows of a matrix.The Rows type trait evaluates the number of...
Definition: Rows.h:76
Header file for the IsComplex type trait.
Header file for the DeclHerm functor.
Header file for the complex data type.
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:166
typename T::RightOperand RightOperand_
Alias declaration for nested RightOperand type definitions.The RightOperand_ alias declaration provid...
Definition: Aliases.h:363
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:403
Header file for the IsUpper type trait.
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
Flag for Hermitian matrices.
Definition: DMatTDMatMultExpr.h:194
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:412
BLAZE_ALWAYS_INLINE bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:677
const DMatDMatMultExpr< T1, T2, false, false, false, false > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7505
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
DisableIf_< IsDiagonal< MT >, const DMatDeclDiagExpr< MT, SO > > decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given non-diagonal dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:841
DisableIf_< IsUpper< MT >, const DMatDeclUppExpr< MT, SO > > declupp(const DenseMatrix< MT, SO > &dm)
Declares the given non-upper dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:842
Evaluation of the expression type type of a column operation.Via this type trait it is possible to ev...
Definition: ColumnExprTrait.h:78
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the Bool class template.
Header file for the TDVecTDMatMultExprTrait class template.
Header file for the DeclSym functor.
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: DMatTDMatMultExpr.h:476
Header file for the TrueType type/value trait base class.
Header file for the IsExpression type trait class.
Header file for the function trace functionality.