TDMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/blas/gemm.h>
44 #include <blaze/math/blas/trmm.h>
45 #include <blaze/math/Aliases.h>
52 #include <blaze/math/dense/MMM.h>
53 #include <blaze/math/Exception.h>
59 #include <blaze/math/Functions.h>
67 #include <blaze/math/shims/Reset.h>
69 #include <blaze/math/SIMD.h>
114 #include <blaze/system/BLAS.h>
115 #include <blaze/system/Blocking.h>
116 #include <blaze/system/Debugging.h>
118 #include <blaze/system/Thresholds.h>
119 #include <blaze/util/Assert.h>
120 #include <blaze/util/Complex.h>
124 #include <blaze/util/DisableIf.h>
125 #include <blaze/util/EnableIf.h>
128 #include <blaze/util/InvalidType.h>
129 #include <blaze/util/mpl/And.h>
130 #include <blaze/util/mpl/Bool.h>
131 #include <blaze/util/mpl/If.h>
132 #include <blaze/util/mpl/Not.h>
133 #include <blaze/util/mpl/Or.h>
134 #include <blaze/util/TrueType.h>
135 #include <blaze/util/Types.h>
145 
146 
147 namespace blaze {
148 
149 //=================================================================================================
150 //
151 // CLASS TDMATTDMATMULTEXPR
152 //
153 //=================================================================================================
154 
155 //*************************************************************************************************
162 template< typename MT1 // Type of the left-hand side dense matrix
163  , typename MT2 // Type of the right-hand side dense matrix
164  , bool SF // Symmetry flag
165  , bool HF // Hermitian flag
166  , bool LF // Lower flag
167  , bool UF > // Upper flag
168 class TDMatTDMatMultExpr : public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, true >
169  , private MatMatMultExpr
170  , private Computation
171 {
172  private:
173  //**Type definitions****************************************************************************
180  //**********************************************************************************************
181 
182  //**********************************************************************************************
184  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
185  //**********************************************************************************************
186 
187  //**********************************************************************************************
189  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
190  //**********************************************************************************************
191 
192  //**********************************************************************************************
194  enum : bool {
195  SYM = ( SF && !( HF || LF || UF ) ),
196  HERM = ( HF && !( LF || UF ) ),
197  LOW = ( LF || ( ( SF || HF ) && UF ) ),
198  UPP = ( UF || ( ( SF || HF ) && LF ) )
199  };
200  //**********************************************************************************************
201 
202  //**********************************************************************************************
204 
210  template< typename T1, typename T2, typename T3 >
211  struct CanExploitSymmetry {
212  enum : bool { value = IsRowMajorMatrix<T1>::value &&
214  };
216  //**********************************************************************************************
217 
218  //**********************************************************************************************
220 
224  template< typename T1, typename T2, typename T3 >
225  struct IsEvaluationRequired {
226  enum : bool { value = ( evaluateLeft || evaluateRight ) &&
227  CanExploitSymmetry<T1,T2,T3>::value };
228  };
230  //**********************************************************************************************
231 
232  //**********************************************************************************************
234 
237  template< typename T1, typename T2, typename T3 >
238  struct UseBlasKernel {
240  !SYM && !HERM && !LOW && !UPP &&
245  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
250  IsSame< ElementType_<T1>, ElementType_<T3> >::value };
251  };
253  //**********************************************************************************************
254 
255  //**********************************************************************************************
257 
260  template< typename T1, typename T2, typename T3 >
261  struct UseVectorizedDefaultKernel {
262  enum : bool { value = useOptimizedKernels &&
264  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
267  , ElementType_<T3> >::value &&
270  };
272  //**********************************************************************************************
273 
274  //**********************************************************************************************
276 
279  typedef IfTrue_< HERM
280  , DeclHerm
281  , IfTrue_< SYM
282  , DeclSym
283  , IfTrue_< LOW
284  , IfTrue_< UPP
285  , DeclDiag
286  , DeclLow >
287  , IfTrue_< UPP
288  , DeclUpp
289  , Noop > > > > ForwardFunctor;
291  //**********************************************************************************************
292 
293  public:
294  //**Type definitions****************************************************************************
297 
303  typedef const ElementType ReturnType;
304  typedef const ResultType CompositeType;
305 
307  typedef If_< IsExpression<MT1>, const MT1, const MT1& > LeftOperand;
308 
310  typedef If_< IsExpression<MT2>, const MT2, const MT2& > RightOperand;
311 
314 
317  //**********************************************************************************************
318 
319  //**Compilation flags***************************************************************************
321  enum : bool { simdEnabled = !IsDiagonal<MT1>::value &&
322  MT1::simdEnabled && MT2::simdEnabled &&
325 
327  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
328  !evaluateRight && MT2::smpAssignable };
329  //**********************************************************************************************
330 
331  //**SIMD properties*****************************************************************************
333  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
334  //**********************************************************************************************
335 
336  //**Constructor*********************************************************************************
342  explicit inline TDMatTDMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
343  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
344  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
345  {
346  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
347  }
348  //**********************************************************************************************
349 
350  //**Access operator*****************************************************************************
357  inline ReturnType operator()( size_t i, size_t j ) const {
358  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
359  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
360 
361  if( IsDiagonal<MT1>::value ) {
362  return lhs_(i,i) * rhs_(i,j);
363  }
364  else if( IsDiagonal<MT2>::value ) {
365  return lhs_(i,j) * rhs_(j,j);
366  }
368  const size_t begin( ( IsUpper<MT1>::value )
369  ?( ( IsLower<MT2>::value )
370  ?( max( ( IsStrictlyUpper<MT1>::value ? i+1UL : i )
371  , ( IsStrictlyLower<MT2>::value ? j+1UL : j ) ) )
372  :( IsStrictlyUpper<MT1>::value ? i+1UL : i ) )
373  :( ( IsLower<MT2>::value )
374  ?( IsStrictlyLower<MT2>::value ? j+1UL : j )
375  :( 0UL ) ) );
376  const size_t end( ( IsLower<MT1>::value )
377  ?( ( IsUpper<MT2>::value )
378  ?( min( ( IsStrictlyLower<MT1>::value ? i : i+1UL )
379  , ( IsStrictlyUpper<MT2>::value ? j : j+1UL ) ) )
380  :( IsStrictlyLower<MT1>::value ? i : i+1UL ) )
381  :( ( IsUpper<MT2>::value )
382  ?( IsStrictlyUpper<MT2>::value ? j : j+1UL )
383  :( lhs_.columns() ) ) );
384 
385  if( begin >= end ) return ElementType();
386 
387  const size_t n( end - begin );
388 
389  return subvector( row( lhs_, i ), begin, n ) * subvector( column( rhs_, j ), begin, n );
390  }
391  else {
392  return row( lhs_, i ) * column( rhs_, j );
393  }
394  }
395  //**********************************************************************************************
396 
397  //**At function*********************************************************************************
405  inline ReturnType at( size_t i, size_t j ) const {
406  if( i >= lhs_.rows() ) {
407  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
408  }
409  if( j >= rhs_.columns() ) {
410  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
411  }
412  return (*this)(i,j);
413  }
414  //**********************************************************************************************
415 
416  //**Rows function*******************************************************************************
421  inline size_t rows() const noexcept {
422  return lhs_.rows();
423  }
424  //**********************************************************************************************
425 
426  //**Columns function****************************************************************************
431  inline size_t columns() const noexcept {
432  return rhs_.columns();
433  }
434  //**********************************************************************************************
435 
436  //**Left operand access*************************************************************************
441  inline LeftOperand leftOperand() const noexcept {
442  return lhs_;
443  }
444  //**********************************************************************************************
445 
446  //**Right operand access************************************************************************
451  inline RightOperand rightOperand() const noexcept {
452  return rhs_;
453  }
454  //**********************************************************************************************
455 
456  //**********************************************************************************************
462  template< typename T >
463  inline bool canAlias( const T* alias ) const noexcept {
464  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
465  }
466  //**********************************************************************************************
467 
468  //**********************************************************************************************
474  template< typename T >
475  inline bool isAliased( const T* alias ) const noexcept {
476  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
477  }
478  //**********************************************************************************************
479 
480  //**********************************************************************************************
485  inline bool isAligned() const noexcept {
486  return lhs_.isAligned() && rhs_.isAligned();
487  }
488  //**********************************************************************************************
489 
490  //**********************************************************************************************
495  inline bool canSMPAssign() const noexcept {
496  return ( !BLAZE_BLAS_IS_PARALLEL ||
497  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
498  ( rows() * columns() >= SMP_TDMATTDMATMULT_THRESHOLD ) &&
500  }
501  //**********************************************************************************************
502 
503  private:
504  //**Member variables****************************************************************************
505  LeftOperand lhs_;
506  RightOperand rhs_;
507  //**********************************************************************************************
508 
509  //**Assignment to dense matrices****************************************************************
522  template< typename MT // Type of the target dense matrix
523  , bool SO > // Storage order of the target dense matrix
525  assign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
526  {
528 
529  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
530  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
531 
532  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
533  return;
534  }
535  else if( rhs.lhs_.columns() == 0UL ) {
536  reset( ~lhs );
537  return;
538  }
539 
540  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
541  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
542 
543  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
544  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
545  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
546  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
547  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
548  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
549 
550  TDMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
551  }
553  //**********************************************************************************************
554 
555  //**Assignment to dense matrices (kernel selection)*********************************************
566  template< typename MT3 // Type of the left-hand side target matrix
567  , typename MT4 // Type of the left-hand side matrix operand
568  , typename MT5 > // Type of the right-hand side matrix operand
569  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
570  {
571  if( ( IsDiagonal<MT4>::value ) ||
572  ( !BLAZE_DEBUG_MODE && A.rows() <= SIMDSIZE*10UL ) ||
573  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
574  selectSmallAssignKernel( C, A, B );
575  else
576  selectBlasAssignKernel( C, A, B );
577  }
579  //**********************************************************************************************
580 
581  //**Default assignment to dense matrices (general/general)**************************************
595  template< typename MT3 // Type of the left-hand side target matrix
596  , typename MT4 // Type of the left-hand side matrix operand
597  , typename MT5 > // Type of the right-hand side matrix operand
599  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
600  {
601  const size_t M( A.rows() );
602  const size_t N( B.columns() );
603  const size_t K( A.columns() );
604 
605  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
606 
607  for( size_t j=0UL; j<N; ++j )
608  {
609  const size_t kbegin( ( IsLower<MT5>::value )
610  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
611  :( 0UL ) );
612  const size_t kend( ( IsUpper<MT5>::value )
613  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
614  :( K ) );
615  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
616 
617  if( IsStrictlyTriangular<MT5>::value && kbegin == kend ) {
618  for( size_t i=0UL; i<M; ++i ) {
619  reset( C(i,j) );
620  }
621  continue;
622  }
623 
624  {
625  const size_t ibegin( ( IsLower<MT4>::value )
627  ?( LOW ? max(j,kbegin+1UL) : kbegin+1UL )
628  :( LOW ? max(j,kbegin) : kbegin ) )
629  :( LOW ? j : 0UL ) );
630  const size_t iend( ( IsUpper<MT4>::value )
632  ?( UPP ? min(j+1UL,kbegin) : kbegin )
633  :( UPP ? min(j,kbegin)+1UL : kbegin+1UL ) )
634  :( UPP ? j+1UL : M ) );
635 
636  if( ( IsLower<MT4>::value && IsLower<MT5>::value ) || LOW ) {
637  for( size_t i=0UL; i<ibegin; ++i ) {
638  reset( C(i,j) );
639  }
640  }
641  else if( IsStrictlyLower<MT4>::value ) {
642  reset( C(0UL,j) );
643  }
644  for( size_t i=ibegin; i<iend; ++i ) {
645  C(i,j) = A(i,kbegin) * B(kbegin,j);
646  }
647  if( ( IsUpper<MT4>::value && IsUpper<MT5>::value ) || UPP ) {
648  for( size_t i=iend; i<M; ++i ) {
649  reset( C(i,j) );
650  }
651  }
652  else if( IsStrictlyUpper<MT4>::value ) {
653  reset( C(M-1UL,j) );
654  }
655  }
656 
657  for( size_t k=kbegin+1UL; k<kend; ++k )
658  {
659  const size_t ibegin( ( IsLower<MT4>::value )
661  ?( SYM || HERM || LOW ? max( j, k+1UL ) : k+1UL )
662  :( SYM || HERM || LOW ? max( j, k ) : k ) )
663  :( SYM || HERM || LOW ? j : 0UL ) );
664  const size_t iend( ( IsUpper<MT4>::value )
666  ?( UPP ? min(j+1UL,k-1UL) : k-1UL )
667  :( UPP ? min(j+1UL,k) : k ) )
668  :( UPP ? j+1UL : M ) );
669 
670  if( ( SYM || HERM || LOW || UPP ) && ( ibegin > iend ) ) continue;
671  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
672 
673  for( size_t i=ibegin; i<iend; ++i ) {
674  C(i,j) += A(i,k) * B(k,j);
675  }
676  if( IsUpper<MT4>::value ) {
677  C(iend,j) = A(iend,k) * B(k,j);
678  }
679  }
680  }
681 
682  if( SYM || HERM ) {
683  for( size_t j=1UL; j<N; ++j ) {
684  for( size_t i=0UL; i<j; ++i ) {
685  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
686  }
687  }
688  }
689  }
691  //**********************************************************************************************
692 
693  //**Default assignment to dense matrices (general/diagonal)*************************************
707  template< typename MT3 // Type of the left-hand side target matrix
708  , typename MT4 // Type of the left-hand side matrix operand
709  , typename MT5 > // Type of the right-hand side matrix operand
710  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
711  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
712  {
714 
715  const size_t M( A.rows() );
716  const size_t N( B.columns() );
717 
718  for( size_t j=0UL; j<N; ++j )
719  {
720  const size_t ibegin( ( IsLower<MT4>::value )
721  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
722  :( 0UL ) );
723  const size_t iend( ( IsUpper<MT4>::value )
724  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
725  :( M ) );
726  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
727 
728  if( IsLower<MT4>::value ) {
729  for( size_t i=0UL; i<ibegin; ++i ) {
730  reset( C(i,j) );
731  }
732  }
733  for( size_t i=ibegin; i<iend; ++i ) {
734  C(i,j) = A(i,j) * B(j,j);
735  }
736  if( IsUpper<MT4>::value ) {
737  for( size_t i=iend; i<M; ++i ) {
738  reset( C(i,j) );
739  }
740  }
741  }
742  }
744  //**********************************************************************************************
745 
746  //**Default assignment to dense matrices (diagonal/general)*************************************
760  template< typename MT3 // Type of the left-hand side target matrix
761  , typename MT4 // Type of the left-hand side matrix operand
762  , typename MT5 > // Type of the right-hand side matrix operand
764  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
765  {
767 
768  const size_t M( A.rows() );
769  const size_t N( B.columns() );
770 
771  for( size_t j=0UL; j<N; ++j )
772  {
773  const size_t ibegin( ( IsLower<MT5>::value )
774  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
775  :( 0UL ) );
776  const size_t iend( ( IsUpper<MT5>::value )
777  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
778  :( M ) );
779  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
780 
781  if( IsLower<MT4>::value ) {
782  for( size_t i=0UL; i<ibegin; ++i ) {
783  reset( C(i,j) );
784  }
785  }
786  for( size_t i=ibegin; i<iend; ++i ) {
787  C(i,j) = A(i,i) * B(i,j);
788  }
789  if( IsUpper<MT4>::value ) {
790  for( size_t i=iend; i<M; ++i ) {
791  reset( C(i,j) );
792  }
793  }
794  }
795  }
797  //**********************************************************************************************
798 
799  //**Default assignment to dense matrices (diagonal/diagonal)************************************
813  template< typename MT3 // Type of the left-hand side target matrix
814  , typename MT4 // Type of the left-hand side matrix operand
815  , typename MT5 > // Type of the right-hand side matrix operand
816  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
817  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
818  {
820 
821  reset( C );
822 
823  for( size_t i=0UL; i<A.rows(); ++i ) {
824  C(i,i) = A(i,i) * B(i,i);
825  }
826  }
828  //**********************************************************************************************
829 
830  //**Default assignment to dense matrices (small matrices)***************************************
844  template< typename MT3 // Type of the left-hand side target matrix
845  , typename MT4 // Type of the left-hand side matrix operand
846  , typename MT5 > // Type of the right-hand side matrix operand
848  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B )
849  {
850  selectDefaultAssignKernel( C, A, B );
851  }
853  //**********************************************************************************************
854 
855  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
870  template< typename MT3 // Type of the left-hand side target matrix
871  , typename MT4 // Type of the left-hand side matrix operand
872  , typename MT5 > // Type of the right-hand side matrix operand
874  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
875  {
880 
881  const ForwardFunctor fwd;
882 
884  const OppositeType_<MT5> tmp( serial( B ) );
885  assign( ~C, fwd( A * tmp ) );
886  }
888  const OppositeType_<MT4> tmp( serial( A ) );
889  assign( ~C, fwd( tmp * B ) );
890  }
891  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
892  const OppositeType_<MT5> tmp( serial( B ) );
893  assign( ~C, fwd( A * tmp ) );
894  }
895  else {
896  const OppositeType_<MT4> tmp( serial( A ) );
897  assign( ~C, fwd( tmp * B ) );
898  }
899  }
901  //**********************************************************************************************
902 
903  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
918  template< typename MT3 // Type of the left-hand side target matrix
919  , typename MT4 // Type of the left-hand side matrix operand
920  , typename MT5 > // Type of the right-hand side matrix operand
922  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
923  {
924  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT4>::value );
925 
926  const size_t M( A.rows() );
927  const size_t N( B.columns() );
928  const size_t K( A.columns() );
929 
930  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
931 
932  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
933  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
934 
935  if( LOW && UPP && M > SIMDSIZE*3UL ) {
936  reset( ~C );
937  }
938 
939  {
940  size_t i( 0UL );
941 
943  {
944  for( ; !SYM && !HERM && !LOW && !UPP && (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL ) {
945  for( size_t j=0UL; j<N; ++j )
946  {
947  const size_t kbegin( ( IsLower<MT5>::value )
948  ?( ( IsUpper<MT4>::value )
949  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
950  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
951  :( IsUpper<MT4>::value ? i : 0UL ) );
952  const size_t kend( ( IsUpper<MT5>::value )
953  ?( ( IsLower<MT4>::value )
954  ?( min( i+SIMDSIZE*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
955  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
956  :( IsLower<MT4>::value ? min( i+SIMDSIZE*8UL, K ) : K ) );
957 
958  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
959 
960  for( size_t k=kbegin; k<kend; ++k ) {
961  const SIMDType b1( set( B(k,j) ) );
962  xmm1 += A.load(i ,k) * b1;
963  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
964  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
965  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
966  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
967  xmm6 += A.load(i+SIMDSIZE*5UL,k) * b1;
968  xmm7 += A.load(i+SIMDSIZE*6UL,k) * b1;
969  xmm8 += A.load(i+SIMDSIZE*7UL,k) * b1;
970  }
971 
972  (~C).store( i , j, xmm1 );
973  (~C).store( i+SIMDSIZE , j, xmm2 );
974  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
975  (~C).store( i+SIMDSIZE*3UL, j, xmm4 );
976  (~C).store( i+SIMDSIZE*4UL, j, xmm5 );
977  (~C).store( i+SIMDSIZE*5UL, j, xmm6 );
978  (~C).store( i+SIMDSIZE*6UL, j, xmm7 );
979  (~C).store( i+SIMDSIZE*7UL, j, xmm8 );
980  }
981  }
982  }
983 
984  for( ; !SYM && !HERM && !LOW && !UPP && (i+SIMDSIZE*4UL) < ipos; i+=SIMDSIZE*5UL )
985  {
986  size_t j( 0UL );
987 
988  for( ; (j+2UL) <= N; j+=2UL )
989  {
990  const size_t kbegin( ( IsLower<MT5>::value )
991  ?( ( IsUpper<MT4>::value )
992  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
993  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
994  :( IsUpper<MT4>::value ? i : 0UL ) );
995  const size_t kend( ( IsUpper<MT5>::value )
996  ?( ( IsLower<MT4>::value )
997  ?( min( i+SIMDSIZE*5UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
998  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
999  :( IsLower<MT4>::value ? min( i+SIMDSIZE*5UL, K ) : K ) );
1000 
1001  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;
1002 
1003  for( size_t k=kbegin; k<kend; ++k ) {
1004  const SIMDType a1( A.load(i ,k) );
1005  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
1006  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
1007  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
1008  const SIMDType a5( A.load(i+SIMDSIZE*4UL,k) );
1009  const SIMDType b1( set( B(k,j ) ) );
1010  const SIMDType b2( set( B(k,j+1UL) ) );
1011  xmm1 += a1 * b1;
1012  xmm2 += a2 * b1;
1013  xmm3 += a3 * b1;
1014  xmm4 += a4 * b1;
1015  xmm5 += a5 * b1;
1016  xmm6 += a1 * b2;
1017  xmm7 += a2 * b2;
1018  xmm8 += a3 * b2;
1019  xmm9 += a4 * b2;
1020  xmm10 += a5 * b2;
1021  }
1022 
1023  (~C).store( i , j , xmm1 );
1024  (~C).store( i+SIMDSIZE , j , xmm2 );
1025  (~C).store( i+SIMDSIZE*2UL, j , xmm3 );
1026  (~C).store( i+SIMDSIZE*3UL, j , xmm4 );
1027  (~C).store( i+SIMDSIZE*4UL, j , xmm5 );
1028  (~C).store( i , j+1UL, xmm6 );
1029  (~C).store( i+SIMDSIZE , j+1UL, xmm7 );
1030  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm8 );
1031  (~C).store( i+SIMDSIZE*3UL, j+1UL, xmm9 );
1032  (~C).store( i+SIMDSIZE*4UL, j+1UL, xmm10 );
1033  }
1034 
1035  if( j < N )
1036  {
1037  const size_t kbegin( ( IsLower<MT5>::value )
1038  ?( ( IsUpper<MT4>::value )
1039  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1040  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1041  :( IsUpper<MT4>::value ? i : 0UL ) );
1042  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*5UL, K ) ):( K ) );
1043 
1044  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5;
1045 
1046  for( size_t k=kbegin; k<kend; ++k ) {
1047  const SIMDType b1( set( B(k,j) ) );
1048  xmm1 += A.load(i ,k) * b1;
1049  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
1050  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
1051  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
1052  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
1053  }
1054 
1055  (~C).store( i , j, xmm1 );
1056  (~C).store( i+SIMDSIZE , j, xmm2 );
1057  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
1058  (~C).store( i+SIMDSIZE*3UL, j, xmm4 );
1059  (~C).store( i+SIMDSIZE*4UL, j, xmm5 );
1060  }
1061  }
1062 
1063  for( ; !( LOW && UPP ) && (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
1064  {
1065  const size_t jend( SYM || HERM || LOW ? min(i+SIMDSIZE*4UL,N) : N );
1066  size_t j( UPP ? i : 0UL );
1067 
1068  for( ; (j+2UL) <= jend; j+=2UL )
1069  {
1070  const size_t kbegin( ( IsLower<MT5>::value )
1071  ?( ( IsUpper<MT4>::value )
1072  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1073  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1074  :( IsUpper<MT4>::value ? i : 0UL ) );
1075  const size_t kend( ( IsUpper<MT5>::value )
1076  ?( ( IsLower<MT4>::value )
1077  ?( min( i+SIMDSIZE*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
1078  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
1079  :( IsLower<MT4>::value ? min( i+SIMDSIZE*4UL, K ) : K ) );
1080 
1081  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1082 
1083  for( size_t k=kbegin; k<kend; ++k ) {
1084  const SIMDType a1( A.load(i ,k) );
1085  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
1086  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
1087  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
1088  const SIMDType b1( set( B(k,j ) ) );
1089  const SIMDType b2( set( B(k,j+1UL) ) );
1090  xmm1 += a1 * b1;
1091  xmm2 += a2 * b1;
1092  xmm3 += a3 * b1;
1093  xmm4 += a4 * b1;
1094  xmm5 += a1 * b2;
1095  xmm6 += a2 * b2;
1096  xmm7 += a3 * b2;
1097  xmm8 += a4 * b2;
1098  }
1099 
1100  (~C).store( i , j , xmm1 );
1101  (~C).store( i+SIMDSIZE , j , xmm2 );
1102  (~C).store( i+SIMDSIZE*2UL, j , xmm3 );
1103  (~C).store( i+SIMDSIZE*3UL, j , xmm4 );
1104  (~C).store( i , j+1UL, xmm5 );
1105  (~C).store( i+SIMDSIZE , j+1UL, xmm6 );
1106  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm7 );
1107  (~C).store( i+SIMDSIZE*3UL, j+1UL, xmm8 );
1108  }
1109 
1110  if( j < jend )
1111  {
1112  const size_t kbegin( ( IsLower<MT5>::value )
1113  ?( ( IsUpper<MT4>::value )
1114  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1115  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1116  :( IsUpper<MT4>::value ? i : 0UL ) );
1117  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*4UL, K ) ):( K ) );
1118 
1119  SIMDType xmm1, xmm2, xmm3, xmm4;
1120 
1121  for( size_t k=kbegin; k<kend; ++k ) {
1122  const SIMDType b1( set( B(k,j) ) );
1123  xmm1 += A.load(i ,k) * b1;
1124  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
1125  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
1126  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
1127  }
1128 
1129  (~C).store( i , j, xmm1 );
1130  (~C).store( i+SIMDSIZE , j, xmm2 );
1131  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
1132  (~C).store( i+SIMDSIZE*3UL, j, xmm4 );
1133  }
1134  }
1135 
1136  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
1137  {
1138  const size_t jend( SYM || HERM || LOW ? min(i+SIMDSIZE*3UL,N) : N );
1139  size_t j( UPP ? i : 0UL );
1140 
1141  for( ; (j+2UL) <= jend; j+=2UL )
1142  {
1143  const size_t kbegin( ( IsLower<MT5>::value )
1144  ?( ( IsUpper<MT4>::value )
1145  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1146  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1147  :( IsUpper<MT4>::value ? i : 0UL ) );
1148  const size_t kend( ( IsUpper<MT5>::value )
1149  ?( ( IsLower<MT4>::value )
1150  ?( min( i+SIMDSIZE*3UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
1151  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
1152  :( IsLower<MT4>::value ? min( i+SIMDSIZE*3UL, K ) : K ) );
1153 
1154  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
1155 
1156  for( size_t k=kbegin; k<kend; ++k ) {
1157  const SIMDType a1( A.load(i ,k) );
1158  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
1159  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
1160  const SIMDType b1( set( B(k,j ) ) );
1161  const SIMDType b2( set( B(k,j+1UL) ) );
1162  xmm1 += a1 * b1;
1163  xmm2 += a2 * b1;
1164  xmm3 += a3 * b1;
1165  xmm4 += a1 * b2;
1166  xmm5 += a2 * b2;
1167  xmm6 += a3 * b2;
1168  }
1169 
1170  (~C).store( i , j , xmm1 );
1171  (~C).store( i+SIMDSIZE , j , xmm2 );
1172  (~C).store( i+SIMDSIZE*2UL, j , xmm3 );
1173  (~C).store( i , j+1UL, xmm4 );
1174  (~C).store( i+SIMDSIZE , j+1UL, xmm5 );
1175  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm6 );
1176  }
1177 
1178  if( j < jend )
1179  {
1180  const size_t kbegin( ( IsLower<MT5>::value )
1181  ?( ( IsUpper<MT4>::value )
1182  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1183  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1184  :( IsUpper<MT4>::value ? i : 0UL ) );
1185  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*3UL, K ) ):( K ) );
1186 
1187  SIMDType xmm1, xmm2, xmm3;
1188 
1189  for( size_t k=kbegin; k<kend; ++k ) {
1190  const SIMDType b1( set( B(k,j) ) );
1191  xmm1 += A.load(i ,k) * b1;
1192  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
1193  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
1194  }
1195 
1196  (~C).store( i , j, xmm1 );
1197  (~C).store( i+SIMDSIZE , j, xmm2 );
1198  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
1199  }
1200  }
1201 
1202  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
1203  {
1204  const size_t jend( SYM || HERM || LOW ? min(i+SIMDSIZE*2UL,N) : N );
1205  size_t j( UPP ? i : 0UL );
1206 
1207  for( ; (j+2UL) <= jend; j+=2UL )
1208  {
1209  const size_t kbegin( ( IsLower<MT5>::value )
1210  ?( ( IsUpper<MT4>::value )
1211  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1212  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1213  :( IsUpper<MT4>::value ? i : 0UL ) );
1214  const size_t kend( ( IsUpper<MT5>::value )
1215  ?( ( IsLower<MT4>::value )
1216  ?( min( i+SIMDSIZE*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
1217  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
1218  :( IsLower<MT4>::value ? min( i+SIMDSIZE*2UL, K ) : K ) );
1219 
1220  SIMDType xmm1, xmm2, xmm3, xmm4;
1221 
1222  for( size_t k=kbegin; k<kend; ++k ) {
1223  const SIMDType a1( A.load(i ,k) );
1224  const SIMDType a2( A.load(i+SIMDSIZE,k) );
1225  const SIMDType b1( set( B(k,j ) ) );
1226  const SIMDType b2( set( B(k,j+1UL) ) );
1227  xmm1 += a1 * b1;
1228  xmm2 += a2 * b1;
1229  xmm3 += a1 * b2;
1230  xmm4 += a2 * b2;
1231  }
1232 
1233  (~C).store( i , j , xmm1 );
1234  (~C).store( i+SIMDSIZE, j , xmm2 );
1235  (~C).store( i , j+1UL, xmm3 );
1236  (~C).store( i+SIMDSIZE, j+1UL, xmm4 );
1237  }
1238 
1239  if( j < jend )
1240  {
1241  const size_t kbegin( ( IsLower<MT5>::value )
1242  ?( ( IsUpper<MT4>::value )
1243  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1244  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1245  :( IsUpper<MT4>::value ? i : 0UL ) );
1246  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*2UL, K ) ):( K ) );
1247 
1248  SIMDType xmm1, xmm2;
1249 
1250  for( size_t k=kbegin; k<kend; ++k ) {
1251  const SIMDType b1( set( B(k,j) ) );
1252  xmm1 += A.load(i ,k) * b1;
1253  xmm2 += A.load(i+SIMDSIZE,k) * b1;
1254  }
1255 
1256  (~C).store( i , j, xmm1 );
1257  (~C).store( i+SIMDSIZE, j, xmm2 );
1258  }
1259  }
1260 
1261  for( ; i<ipos; i+=SIMDSIZE )
1262  {
1263  const size_t jend( SYM || HERM || LOW ? min(i+SIMDSIZE,N) : N );
1264  size_t j( UPP ? i : 0UL );
1265 
1266  for( ; (j+2UL) <= jend; j+=2UL )
1267  {
1268  const size_t kbegin( ( IsLower<MT5>::value )
1269  ?( ( IsUpper<MT4>::value )
1270  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1271  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1272  :( IsUpper<MT4>::value ? i : 0UL ) );
1273  const size_t kend( ( IsUpper<MT5>::value )
1274  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
1275  :( K ) );
1276 
1277  SIMDType xmm1, xmm2;
1278 
1279  for( size_t k=kbegin; k<kend; ++k ) {
1280  const SIMDType a1( A.load(i,k) );
1281  xmm1 += a1 * set( B(k,j ) );
1282  xmm2 += a1 * set( B(k,j+1UL) );
1283  }
1284 
1285  (~C).store( i, j , xmm1 );
1286  (~C).store( i, j+1UL, xmm2 );
1287  }
1288 
1289  if( j < jend )
1290  {
1291  const size_t kbegin( ( IsLower<MT5>::value )
1292  ?( ( IsUpper<MT4>::value )
1293  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1294  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1295  :( IsUpper<MT4>::value ? i : 0UL ) );
1296 
1297  SIMDType xmm1;
1298 
1299  for( size_t k=kbegin; k<K; ++k ) {
1300  xmm1 += A.load(i,k) * set( B(k,j) );
1301  }
1302 
1303  (~C).store( i, j, xmm1 );
1304  }
1305  }
1306 
1307  for( ; remainder && i<M; ++i )
1308  {
1309  size_t j( LOW && UPP ? i : 0UL );
1310 
1311  for( ; (j+2UL) <= N; j+=2UL )
1312  {
1313  const size_t kbegin( ( IsLower<MT5>::value )
1314  ?( ( IsUpper<MT4>::value )
1315  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1316  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1317  :( IsUpper<MT4>::value ? i : 0UL ) );
1318  const size_t kend( ( IsUpper<MT5>::value )
1319  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
1320  :( K ) );
1321 
1322  ElementType value1 = ElementType();
1323  ElementType value2 = ElementType();
1324 
1325  for( size_t k=kbegin; k<kend; ++k ) {
1326  value1 += A(i,k) * B(k,j );
1327  value2 += A(i,k) * B(k,j+1UL);
1328  }
1329 
1330  (~C)(i,j ) = value1;
1331  (~C)(i,j+1UL) = value2;
1332  }
1333 
1334  if( j < N )
1335  {
1336  const size_t kbegin( ( IsLower<MT5>::value )
1337  ?( ( IsUpper<MT4>::value )
1338  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1339  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1340  :( IsUpper<MT4>::value ? i : 0UL ) );
1341 
1342  ElementType value = ElementType();
1343 
1344  for( size_t k=kbegin; k<K; ++k ) {
1345  value += A(i,k) * B(k,j);
1346  }
1347 
1348  (~C)(i,j) = value;
1349  }
1350  }
1351  }
1352 
1353  if( ( SYM || HERM ) && ( M > SIMDSIZE*4UL ) ) {
1354  for( size_t j=SIMDSIZE*4UL; j<N; ++j ) {
1355  const size_t iend( ( SIMDSIZE*4UL ) * ( j / (SIMDSIZE*4UL) ) );
1356  for( size_t i=0UL; i<iend; ++i ) {
1357  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
1358  }
1359  }
1360  }
1361  else if( LOW && !UPP && M > SIMDSIZE*4UL ) {
1362  for( size_t j=SIMDSIZE*4UL; j<N; ++j ) {
1363  const size_t iend( ( SIMDSIZE*4UL ) * ( j / (SIMDSIZE*4UL) ) );
1364  for( size_t i=0UL; i<iend; ++i ) {
1365  reset( (~C)(i,j) );
1366  }
1367  }
1368  }
1369  else if( !LOW && UPP && M > SIMDSIZE*4UL ) {
1370  for( size_t i=SIMDSIZE*4UL; i<M; ++i ) {
1371  const size_t jend( ( SIMDSIZE*4UL ) * ( i / (SIMDSIZE*4UL) ) );
1372  for( size_t j=0UL; j<jend; ++j ) {
1373  reset( (~C)(i,j) );
1374  }
1375  }
1376  }
1377  }
1379  //**********************************************************************************************
1380 
1381  //**Default assignment to dense matrices (large matrices)***************************************
1395  template< typename MT3 // Type of the left-hand side target matrix
1396  , typename MT4 // Type of the left-hand side matrix operand
1397  , typename MT5 > // Type of the right-hand side matrix operand
1399  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1400  {
1401  selectDefaultAssignKernel( C, A, B );
1402  }
1404  //**********************************************************************************************
1405 
1406  //**Vectorized default assignment to dense matrices (large matrices)****************************
1421  template< typename MT3 // Type of the left-hand side target matrix
1422  , typename MT4 // Type of the left-hand side matrix operand
1423  , typename MT5 > // Type of the right-hand side matrix operand
1425  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1426  {
1427  if( SYM )
1428  smmm( C, A, B, ElementType(1) );
1429  else if( HERM )
1430  hmmm( C, A, B, ElementType(1) );
1431  else if( LOW )
1432  lmmm( C, A, B, ElementType(1), ElementType(0) );
1433  else if( UPP )
1434  ummm( C, A, B, ElementType(1), ElementType(0) );
1435  else
1436  mmm( C, A, B, ElementType(1), ElementType(0) );
1437  }
1439  //**********************************************************************************************
1440 
1441  //**BLAS-based assignment to dense matrices (default)*******************************************
1455  template< typename MT3 // Type of the left-hand side target matrix
1456  , typename MT4 // Type of the left-hand side matrix operand
1457  , typename MT5 > // Type of the right-hand side matrix operand
1459  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1460  {
1461  selectLargeAssignKernel( C, A, B );
1462  }
1464  //**********************************************************************************************
1465 
1466  //**BLAS-based assignment to dense matrices*****************************************************
1467 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
1468 
1481  template< typename MT3 // Type of the left-hand side target matrix
1482  , typename MT4 // Type of the left-hand side matrix operand
1483  , typename MT5 > // Type of the right-hand side matrix operand
1485  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1486  {
1487  typedef ElementType_<MT3> ET;
1488 
1489  if( IsTriangular<MT4>::value ) {
1490  assign( C, B );
1491  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1492  }
1493  else if( IsTriangular<MT5>::value ) {
1494  assign( C, A );
1495  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1496  }
1497  else {
1498  gemm( C, A, B, ET(1), ET(0) );
1499  }
1500  }
1502 #endif
1503  //**********************************************************************************************
1504 
1505  //**Assignment to sparse matrices***************************************************************
1518  template< typename MT // Type of the target sparse matrix
1519  , bool SO > // Storage order of the target sparse matrix
1521  assign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1522  {
1524 
1526 
1533 
1534  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1535  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1536 
1537  const ForwardFunctor fwd;
1538 
1539  const TmpType tmp( serial( rhs ) );
1540  assign( ~lhs, fwd( tmp ) );
1541  }
1543  //**********************************************************************************************
1544 
1545  //**Restructuring assignment to row-major matrices**********************************************
1560  template< typename MT > // Type of the target matrix
1562  assign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
1563  {
1565 
1567 
1568  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1569  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1570 
1571  const ForwardFunctor fwd;
1572 
1574  assign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
1575  else if( IsSymmetric<MT1>::value )
1576  assign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
1577  else
1578  assign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
1579  }
1581  //**********************************************************************************************
1582 
1583  //**Addition assignment to dense matrices*******************************************************
1596  template< typename MT // Type of the target dense matrix
1597  , bool SO > // Storage order of the target dense matrix
1599  addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1600  {
1602 
1603  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1604  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1605 
1606  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1607  return;
1608  }
1609 
1610  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1611  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1612 
1613  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1614  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1615  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1616  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1617  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1618  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1619 
1620  TDMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1621  }
1623  //**********************************************************************************************
1624 
1625  //**Addition assignment to dense matrices (kernel selection)************************************
1636  template< typename MT3 // Type of the left-hand side target matrix
1637  , typename MT4 // Type of the left-hand side matrix operand
1638  , typename MT5 > // Type of the right-hand side matrix operand
1639  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1640  {
1641  if( ( IsDiagonal<MT4>::value ) ||
1642  ( !BLAZE_DEBUG_MODE && A.rows() <= SIMDSIZE*10UL ) ||
1643  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
1644  selectSmallAddAssignKernel( C, A, B );
1645  else
1646  selectBlasAddAssignKernel( C, A, B );
1647  }
1649  //**********************************************************************************************
1650 
1651  //**Default addition assignment to dense matrices (general/general)*****************************
1665  template< typename MT3 // Type of the left-hand side target matrix
1666  , typename MT4 // Type of the left-hand side matrix operand
1667  , typename MT5 > // Type of the right-hand side matrix operand
1668  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
1669  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1670  {
1671  const size_t M( A.rows() );
1672  const size_t N( B.columns() );
1673  const size_t K( A.columns() );
1674 
1675  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1676 
1677  for( size_t j=0UL; j<N; ++j )
1678  {
1679  const size_t kbegin( ( IsLower<MT5>::value )
1680  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1681  :( 0UL ) );
1682  const size_t kend( ( IsUpper<MT5>::value )
1683  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1684  :( K ) );
1685  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
1686 
1687  for( size_t k=kbegin; k<kend; ++k )
1688  {
1689  const size_t ibegin( ( IsLower<MT4>::value )
1691  ?( LOW ? max(j,k+1UL) : k+1UL )
1692  :( LOW ? max(j,k) : k ) )
1693  :( LOW ? j : 0UL ) );
1694  const size_t iend( ( IsUpper<MT4>::value )
1696  ?( UPP ? min(j+1UL,k) : k )
1697  :( UPP ? min(j,k)+1UL : k+1UL ) )
1698  :( UPP ? j+1UL : M ) );
1699 
1700  if( ( LOW || UPP ) && ibegin >= iend ) continue;
1701  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1702 
1703  const size_t inum( iend - ibegin );
1704  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1705 
1706  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1707  C(i ,j) += A(i ,k) * B(k,j);
1708  C(i+1UL,j) += A(i+1UL,k) * B(k,j);
1709  }
1710  if( ipos < iend ) {
1711  C(ipos,j) += A(ipos,k) * B(k,j);
1712  }
1713  }
1714  }
1715  }
1717  //**********************************************************************************************
1718 
1719  //**Default addition assignment to dense matrices (general/diagonal)****************************
1733  template< typename MT3 // Type of the left-hand side target matrix
1734  , typename MT4 // Type of the left-hand side matrix operand
1735  , typename MT5 > // Type of the right-hand side matrix operand
1736  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
1737  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1738  {
1740 
1741  const size_t M( A.rows() );
1742  const size_t N( B.columns() );
1743 
1744  for( size_t j=0UL; j<N; ++j )
1745  {
1746  const size_t ibegin( ( IsLower<MT4>::value )
1747  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
1748  :( 0UL ) );
1749  const size_t iend( ( IsUpper<MT4>::value )
1750  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
1751  :( M ) );
1752  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1753 
1754  const size_t inum( iend - ibegin );
1755  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1756 
1757  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1758  C(i ,j) += A(i ,j) * B(j,j);
1759  C(i+1UL,j) += A(i+1UL,j) * B(j,j);
1760  }
1761  if( ipos < iend ) {
1762  C(ipos,j) += A(ipos,j) * B(j,j);
1763  }
1764  }
1765  }
1767  //**********************************************************************************************
1768 
1769  //**Default addition assignment to dense matrices (diagonal/general)****************************
1783  template< typename MT3 // Type of the left-hand side target matrix
1784  , typename MT4 // Type of the left-hand side matrix operand
1785  , typename MT5 > // Type of the right-hand side matrix operand
1786  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
1787  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1788  {
1790 
1791  const size_t M( A.rows() );
1792  const size_t N( B.columns() );
1793 
1794  for( size_t j=0UL; j<N; ++j )
1795  {
1796  const size_t ibegin( ( IsLower<MT5>::value )
1797  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1798  :( 0UL ) );
1799  const size_t iend( ( IsUpper<MT5>::value )
1800  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1801  :( M ) );
1802  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1803 
1804  const size_t inum( iend - ibegin );
1805  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1806 
1807  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1808  C(i ,j) += A(i ,i ) * B(i ,j);
1809  C(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j);
1810  }
1811  if( ipos < iend ) {
1812  C(ipos,j) += A(ipos,ipos) * B(ipos,j);
1813  }
1814  }
1815  }
1817  //**********************************************************************************************
1818 
1819  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
1833  template< typename MT3 // Type of the left-hand side target matrix
1834  , typename MT4 // Type of the left-hand side matrix operand
1835  , typename MT5 > // Type of the right-hand side matrix operand
1836  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
1837  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1838  {
1840 
1841  for( size_t i=0UL; i<A.rows(); ++i ) {
1842  C(i,i) += A(i,i) * B(i,i);
1843  }
1844  }
1846  //**********************************************************************************************
1847 
1848  //**Default addition assignment to dense matrices (small matrices)******************************
1862  template< typename MT3 // Type of the left-hand side target matrix
1863  , typename MT4 // Type of the left-hand side matrix operand
1864  , typename MT5 > // Type of the right-hand side matrix operand
1866  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1867  {
1868  selectDefaultAddAssignKernel( C, A, B );
1869  }
1871  //**********************************************************************************************
1872 
1873  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
1888  template< typename MT3 // Type of the left-hand side target matrix
1889  , typename MT4 // Type of the left-hand side matrix operand
1890  , typename MT5 > // Type of the right-hand side matrix operand
1892  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1893  {
1898 
1899  const ForwardFunctor fwd;
1900 
1902  const OppositeType_<MT5> tmp( serial( B ) );
1903  addAssign( ~C, fwd( A * tmp ) );
1904  }
1906  const OppositeType_<MT4> tmp( serial( A ) );
1907  addAssign( ~C, fwd( tmp * B ) );
1908  }
1909  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1910  const OppositeType_<MT5> tmp( serial( B ) );
1911  addAssign( ~C, fwd( A * tmp ) );
1912  }
1913  else {
1914  const OppositeType_<MT4> tmp( serial( A ) );
1915  addAssign( ~C, fwd( tmp * B ) );
1916  }
1917  }
1919  //**********************************************************************************************
1920 
1921  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
1936  template< typename MT3 // Type of the left-hand side target matrix
1937  , typename MT4 // Type of the left-hand side matrix operand
1938  , typename MT5 > // Type of the right-hand side matrix operand
1940  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1941  {
1942  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT4>::value );
1943 
1944  const size_t M( A.rows() );
1945  const size_t N( B.columns() );
1946  const size_t K( A.columns() );
1947 
1948  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1949 
1950  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
1951  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
1952 
1953  size_t i( 0UL );
1954 
1956  {
1957  for( ; !LOW && !UPP && (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL ) {
1958  for( size_t j=0UL; j<N; ++j )
1959  {
1960  const size_t kbegin( ( IsLower<MT5>::value )
1961  ?( ( IsUpper<MT4>::value )
1962  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1963  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1964  :( IsUpper<MT4>::value ? i : 0UL ) );
1965  const size_t kend( ( IsUpper<MT5>::value )
1966  ?( ( IsLower<MT4>::value )
1967  ?( min( i+SIMDSIZE*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
1968  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
1969  :( IsLower<MT4>::value ? min( i+SIMDSIZE*8UL, K ) : K ) );
1970 
1971  SIMDType xmm1( (~C).load(i ,j) );
1972  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j) );
1973  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j) );
1974  SIMDType xmm4( (~C).load(i+SIMDSIZE*3UL,j) );
1975  SIMDType xmm5( (~C).load(i+SIMDSIZE*4UL,j) );
1976  SIMDType xmm6( (~C).load(i+SIMDSIZE*5UL,j) );
1977  SIMDType xmm7( (~C).load(i+SIMDSIZE*6UL,j) );
1978  SIMDType xmm8( (~C).load(i+SIMDSIZE*7UL,j) );
1979 
1980  for( size_t k=kbegin; k<kend; ++k ) {
1981  const SIMDType b1( set( B(k,j) ) );
1982  xmm1 += A.load(i ,k) * b1;
1983  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
1984  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
1985  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
1986  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
1987  xmm6 += A.load(i+SIMDSIZE*5UL,k) * b1;
1988  xmm7 += A.load(i+SIMDSIZE*6UL,k) * b1;
1989  xmm8 += A.load(i+SIMDSIZE*7UL,k) * b1;
1990  }
1991 
1992  (~C).store( i , j, xmm1 );
1993  (~C).store( i+SIMDSIZE , j, xmm2 );
1994  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
1995  (~C).store( i+SIMDSIZE*3UL, j, xmm4 );
1996  (~C).store( i+SIMDSIZE*4UL, j, xmm5 );
1997  (~C).store( i+SIMDSIZE*5UL, j, xmm6 );
1998  (~C).store( i+SIMDSIZE*6UL, j, xmm7 );
1999  (~C).store( i+SIMDSIZE*7UL, j, xmm8 );
2000  }
2001  }
2002  }
2003 
2004  for( ; !LOW && !UPP && (i+SIMDSIZE*4UL) < ipos; i+=SIMDSIZE*5UL )
2005  {
2006  size_t j( 0UL );
2007 
2008  for( ; (j+2UL) <= N; j+=2UL )
2009  {
2010  const size_t kbegin( ( IsLower<MT5>::value )
2011  ?( ( IsUpper<MT4>::value )
2012  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2013  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2014  :( IsUpper<MT4>::value ? i : 0UL ) );
2015  const size_t kend( ( IsUpper<MT5>::value )
2016  ?( ( IsLower<MT4>::value )
2017  ?( min( i+SIMDSIZE*5UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
2018  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
2019  :( IsLower<MT4>::value ? min( i+SIMDSIZE*5UL, K ) : K ) );
2020 
2021  SIMDType xmm1 ( (~C).load(i ,j ) );
2022  SIMDType xmm2 ( (~C).load(i+SIMDSIZE ,j ) );
2023  SIMDType xmm3 ( (~C).load(i+SIMDSIZE*2UL,j ) );
2024  SIMDType xmm4 ( (~C).load(i+SIMDSIZE*3UL,j ) );
2025  SIMDType xmm5 ( (~C).load(i+SIMDSIZE*4UL,j ) );
2026  SIMDType xmm6 ( (~C).load(i ,j+1UL) );
2027  SIMDType xmm7 ( (~C).load(i+SIMDSIZE ,j+1UL) );
2028  SIMDType xmm8 ( (~C).load(i+SIMDSIZE*2UL,j+1UL) );
2029  SIMDType xmm9 ( (~C).load(i+SIMDSIZE*3UL,j+1UL) );
2030  SIMDType xmm10( (~C).load(i+SIMDSIZE*4UL,j+1UL) );
2031 
2032  for( size_t k=kbegin; k<kend; ++k ) {
2033  const SIMDType a1( A.load(i ,k) );
2034  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
2035  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
2036  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
2037  const SIMDType a5( A.load(i+SIMDSIZE*4UL,k) );
2038  const SIMDType b1( set( B(k,j ) ) );
2039  const SIMDType b2( set( B(k,j+1UL) ) );
2040  xmm1 += a1 * b1;
2041  xmm2 += a2 * b1;
2042  xmm3 += a3 * b1;
2043  xmm4 += a4 * b1;
2044  xmm5 += a5 * b1;
2045  xmm6 += a1 * b2;
2046  xmm7 += a2 * b2;
2047  xmm8 += a3 * b2;
2048  xmm9 += a4 * b2;
2049  xmm10 += a5 * b2;
2050  }
2051 
2052  (~C).store( i , j , xmm1 );
2053  (~C).store( i+SIMDSIZE , j , xmm2 );
2054  (~C).store( i+SIMDSIZE*2UL, j , xmm3 );
2055  (~C).store( i+SIMDSIZE*3UL, j , xmm4 );
2056  (~C).store( i+SIMDSIZE*4UL, j , xmm5 );
2057  (~C).store( i , j+1UL, xmm6 );
2058  (~C).store( i+SIMDSIZE , j+1UL, xmm7 );
2059  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm8 );
2060  (~C).store( i+SIMDSIZE*3UL, j+1UL, xmm9 );
2061  (~C).store( i+SIMDSIZE*4UL, j+1UL, xmm10 );
2062  }
2063 
2064  if( j < N )
2065  {
2066  const size_t kbegin( ( IsLower<MT5>::value )
2067  ?( ( IsUpper<MT4>::value )
2068  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2069  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2070  :( IsUpper<MT4>::value ? i : 0UL ) );
2071  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*5UL, K ) ):( K ) );
2072 
2073  SIMDType xmm1( (~C).load(i ,j) );
2074  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j) );
2075  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j) );
2076  SIMDType xmm4( (~C).load(i+SIMDSIZE*3UL,j) );
2077  SIMDType xmm5( (~C).load(i+SIMDSIZE*4UL,j) );
2078 
2079  for( size_t k=kbegin; k<kend; ++k ) {
2080  const SIMDType b1( set( B(k,j) ) );
2081  xmm1 += A.load(i ,k) * b1;
2082  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
2083  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
2084  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
2085  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
2086  }
2087 
2088  (~C).store( i , j, xmm1 );
2089  (~C).store( i+SIMDSIZE , j, xmm2 );
2090  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
2091  (~C).store( i+SIMDSIZE*3UL, j, xmm4 );
2092  (~C).store( i+SIMDSIZE*4UL, j, xmm5 );
2093  }
2094  }
2095 
2096  for( ; !LOW && !UPP && (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
2097  {
2098  size_t j( 0UL );
2099 
2100  for( ; (j+2UL) <= N; j+=2UL )
2101  {
2102  const size_t kbegin( ( IsLower<MT5>::value )
2103  ?( ( IsUpper<MT4>::value )
2104  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2105  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2106  :( IsUpper<MT4>::value ? i : 0UL ) );
2107  const size_t kend( ( IsUpper<MT5>::value )
2108  ?( ( IsLower<MT4>::value )
2109  ?( min( i+SIMDSIZE*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
2110  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
2111  :( IsLower<MT4>::value ? min( i+SIMDSIZE*4UL, K ) : K ) );
2112 
2113  SIMDType xmm1( (~C).load(i ,j ) );
2114  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j ) );
2115  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j ) );
2116  SIMDType xmm4( (~C).load(i+SIMDSIZE*3UL,j ) );
2117  SIMDType xmm5( (~C).load(i ,j+1UL) );
2118  SIMDType xmm6( (~C).load(i+SIMDSIZE ,j+1UL) );
2119  SIMDType xmm7( (~C).load(i+SIMDSIZE*2UL,j+1UL) );
2120  SIMDType xmm8( (~C).load(i+SIMDSIZE*3UL,j+1UL) );
2121 
2122  for( size_t k=kbegin; k<kend; ++k ) {
2123  const SIMDType a1( A.load(i ,k) );
2124  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
2125  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
2126  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
2127  const SIMDType b1( set( B(k,j ) ) );
2128  const SIMDType b2( set( B(k,j+1UL) ) );
2129  xmm1 += a1 * b1;
2130  xmm2 += a2 * b1;
2131  xmm3 += a3 * b1;
2132  xmm4 += a4 * b1;
2133  xmm5 += a1 * b2;
2134  xmm6 += a2 * b2;
2135  xmm7 += a3 * b2;
2136  xmm8 += a4 * b2;
2137  }
2138 
2139  (~C).store( i , j , xmm1 );
2140  (~C).store( i+SIMDSIZE , j , xmm2 );
2141  (~C).store( i+SIMDSIZE*2UL, j , xmm3 );
2142  (~C).store( i+SIMDSIZE*3UL, j , xmm4 );
2143  (~C).store( i , j+1UL, xmm5 );
2144  (~C).store( i+SIMDSIZE , j+1UL, xmm6 );
2145  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm7 );
2146  (~C).store( i+SIMDSIZE*3UL, j+1UL, xmm8 );
2147  }
2148 
2149  if( j < N )
2150  {
2151  const size_t kbegin( ( IsLower<MT5>::value )
2152  ?( ( IsUpper<MT4>::value )
2153  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2154  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2155  :( IsUpper<MT4>::value ? i : 0UL ) );
2156  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*4UL, K ) ):( K ) );
2157 
2158  SIMDType xmm1( (~C).load(i ,j) );
2159  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j) );
2160  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j) );
2161  SIMDType xmm4( (~C).load(i+SIMDSIZE*3UL,j) );
2162 
2163  for( size_t k=kbegin; k<kend; ++k ) {
2164  const SIMDType b1( set( B(k,j) ) );
2165  xmm1 += A.load(i ,k) * b1;
2166  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
2167  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
2168  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
2169  }
2170 
2171  (~C).store( i , j, xmm1 );
2172  (~C).store( i+SIMDSIZE , j, xmm2 );
2173  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
2174  (~C).store( i+SIMDSIZE*3UL, j, xmm4 );
2175  }
2176  }
2177 
2178  for( ; !LOW && !UPP && (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
2179  {
2180  size_t j( 0UL );
2181 
2182  for( ; (j+2UL) <= N; j+=2UL )
2183  {
2184  const size_t kbegin( ( IsLower<MT5>::value )
2185  ?( ( IsUpper<MT4>::value )
2186  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2187  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2188  :( IsUpper<MT4>::value ? i : 0UL ) );
2189  const size_t kend( ( IsUpper<MT5>::value )
2190  ?( ( IsLower<MT4>::value )
2191  ?( min( i+SIMDSIZE*3UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
2192  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
2193  :( IsLower<MT4>::value ? min( i+SIMDSIZE*3UL, K ) : K ) );
2194 
2195  SIMDType xmm1( (~C).load(i ,j ) );
2196  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j ) );
2197  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j ) );
2198  SIMDType xmm4( (~C).load(i ,j+1UL) );
2199  SIMDType xmm5( (~C).load(i+SIMDSIZE ,j+1UL) );
2200  SIMDType xmm6( (~C).load(i+SIMDSIZE*2UL,j+1UL) );
2201 
2202  for( size_t k=kbegin; k<kend; ++k ) {
2203  const SIMDType a1( A.load(i ,k) );
2204  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
2205  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
2206  const SIMDType b1( set( B(k,j ) ) );
2207  const SIMDType b2( set( B(k,j+1UL) ) );
2208  xmm1 += a1 * b1;
2209  xmm2 += a2 * b1;
2210  xmm3 += a3 * b1;
2211  xmm4 += a1 * b2;
2212  xmm5 += a2 * b2;
2213  xmm6 += a3 * b2;
2214  }
2215 
2216  (~C).store( i , j , xmm1 );
2217  (~C).store( i+SIMDSIZE , j , xmm2 );
2218  (~C).store( i+SIMDSIZE*2UL, j , xmm3 );
2219  (~C).store( i , j+1UL, xmm4 );
2220  (~C).store( i+SIMDSIZE , j+1UL, xmm5 );
2221  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm6 );
2222  }
2223 
2224  if( j < N )
2225  {
2226  const size_t kbegin( ( IsLower<MT5>::value )
2227  ?( ( IsUpper<MT4>::value )
2228  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2229  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2230  :( IsUpper<MT4>::value ? i : 0UL ) );
2231  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*3UL, K ) ):( K ) );
2232 
2233  SIMDType xmm1( (~C).load(i ,j) );
2234  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j) );
2235  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j) );
2236 
2237  for( size_t k=kbegin; k<kend; ++k ) {
2238  const SIMDType b1( set( B(k,j) ) );
2239  xmm1 += A.load(i ,k) * b1;
2240  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
2241  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
2242  }
2243 
2244  (~C).store( i , j, xmm1 );
2245  (~C).store( i+SIMDSIZE , j, xmm2 );
2246  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
2247  }
2248  }
2249 
2250  for( ; !( LOW && UPP ) && (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
2251  {
2252  const size_t jend( LOW ? min(i+SIMDSIZE*2UL,N) : N );
2253  size_t j( UPP ? i : 0UL );
2254 
2255  for( ; (j+2UL) <= jend; j+=2UL )
2256  {
2257  const size_t kbegin( ( IsLower<MT5>::value )
2258  ?( ( IsUpper<MT4>::value )
2259  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2260  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2261  :( IsUpper<MT4>::value ? i : 0UL ) );
2262  const size_t kend( ( IsUpper<MT5>::value )
2263  ?( ( IsLower<MT4>::value )
2264  ?( min( i+SIMDSIZE*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
2265  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
2266  :( IsLower<MT4>::value ? min( i+SIMDSIZE*2UL, K ) : K ) );
2267 
2268  SIMDType xmm1( (~C).load(i ,j ) );
2269  SIMDType xmm2( (~C).load(i+SIMDSIZE,j ) );
2270  SIMDType xmm3( (~C).load(i ,j+1UL) );
2271  SIMDType xmm4( (~C).load(i+SIMDSIZE,j+1UL) );
2272 
2273  for( size_t k=kbegin; k<kend; ++k ) {
2274  const SIMDType a1( A.load(i ,k) );
2275  const SIMDType a2( A.load(i+SIMDSIZE,k) );
2276  const SIMDType b1( set( B(k,j ) ) );
2277  const SIMDType b2( set( B(k,j+1UL) ) );
2278  xmm1 += a1 * b1;
2279  xmm2 += a2 * b1;
2280  xmm3 += a1 * b2;
2281  xmm4 += a2 * b2;
2282  }
2283 
2284  (~C).store( i , j , xmm1 );
2285  (~C).store( i+SIMDSIZE, j , xmm2 );
2286  (~C).store( i , j+1UL, xmm3 );
2287  (~C).store( i+SIMDSIZE, j+1UL, xmm4 );
2288  }
2289 
2290  if( j < jend )
2291  {
2292  const size_t kbegin( ( IsLower<MT5>::value )
2293  ?( ( IsUpper<MT4>::value )
2294  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2295  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2296  :( IsUpper<MT4>::value ? i : 0UL ) );
2297  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*2UL, K ) ):( K ) );
2298 
2299  SIMDType xmm1( (~C).load(i ,j) );
2300  SIMDType xmm2( (~C).load(i+SIMDSIZE,j) );
2301 
2302  for( size_t k=kbegin; k<kend; ++k ) {
2303  const SIMDType b1( set( B(k,j) ) );
2304  xmm1 += A.load(i ,k) * b1;
2305  xmm2 += A.load(i+SIMDSIZE,k) * b1;
2306  }
2307 
2308  (~C).store( i , j, xmm1 );
2309  (~C).store( i+SIMDSIZE, j, xmm2 );
2310  }
2311  }
2312 
2313  for( ; i<ipos; i+=SIMDSIZE )
2314  {
2315  const size_t jend( LOW && UPP ? min(i+SIMDSIZE,N) : N );
2316  size_t j( UPP ? i : 0UL );
2317 
2318  for( ; (j+2UL) <= jend; j+=2UL )
2319  {
2320  const size_t kbegin( ( IsLower<MT5>::value )
2321  ?( ( IsUpper<MT4>::value )
2322  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2323  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2324  :( IsUpper<MT4>::value ? i : 0UL ) );
2325  const size_t kend( ( IsUpper<MT5>::value )
2326  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
2327  :( K ) );
2328 
2329  SIMDType xmm1( (~C).load(i,j ) );
2330  SIMDType xmm2( (~C).load(i,j+1UL) );
2331 
2332  for( size_t k=kbegin; k<kend; ++k ) {
2333  const SIMDType a1( A.load(i,k) );
2334  xmm1 += a1 * set( B(k,j ) );
2335  xmm2 += a1 * set( B(k,j+1UL) );
2336  }
2337 
2338  (~C).store( i, j , xmm1 );
2339  (~C).store( i, j+1UL, xmm2 );
2340  }
2341 
2342  if( j < jend )
2343  {
2344  const size_t kbegin( ( IsLower<MT5>::value )
2345  ?( ( IsUpper<MT4>::value )
2346  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2347  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2348  :( IsUpper<MT4>::value ? i : 0UL ) );
2349 
2350  SIMDType xmm1( (~C).load(i,j) );
2351 
2352  for( size_t k=kbegin; k<K; ++k ) {
2353  xmm1 += A.load(i,k) * set( B(k,j) );
2354  }
2355 
2356  (~C).store( i, j, xmm1 );
2357  }
2358  }
2359 
2360  for( ; remainder && i<M; ++i )
2361  {
2362  const size_t jend( LOW ? i+1UL : N );
2363  size_t j( UPP ? i : 0UL );
2364 
2365  for( ; (j+2UL) <= jend; j+=2UL )
2366  {
2367  const size_t kbegin( ( IsLower<MT5>::value )
2368  ?( ( IsUpper<MT4>::value )
2369  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2370  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2371  :( IsUpper<MT4>::value ? i : 0UL ) );
2372  const size_t kend( ( IsUpper<MT5>::value )
2373  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
2374  :( K ) );
2375 
2376  ElementType value1( (~C)(i,j ) );
2377  ElementType value2( (~C)(i,j+1UL) );
2378 
2379  for( size_t k=kbegin; k<kend; ++k ) {
2380  value1 += A(i,k) * B(k,j );
2381  value2 += A(i,k) * B(k,j+1UL);
2382  }
2383 
2384  (~C)(i,j ) = value1;
2385  (~C)(i,j+1UL) = value2;
2386  }
2387 
2388  if( j < jend )
2389  {
2390  const size_t kbegin( ( IsLower<MT5>::value )
2391  ?( ( IsUpper<MT4>::value )
2392  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2393  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2394  :( IsUpper<MT4>::value ? i : 0UL ) );
2395 
2396  ElementType value( (~C)(i,j) );
2397 
2398  for( size_t k=kbegin; k<K; ++k ) {
2399  value += A(i,k) * B(k,j);
2400  }
2401 
2402  (~C)(i,j) = value;
2403  }
2404  }
2405  }
2407  //**********************************************************************************************
2408 
2409  //**Default addition assignment to dense matrices (large matrices)******************************
2423  template< typename MT3 // Type of the left-hand side target matrix
2424  , typename MT4 // Type of the left-hand side matrix operand
2425  , typename MT5 > // Type of the right-hand side matrix operand
2427  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2428  {
2429  selectDefaultAddAssignKernel( C, A, B );
2430  }
2432  //**********************************************************************************************
2433 
2434  //**Vectorized default addition assignment to dense matrices (large matrices)*******************
2449  template< typename MT3 // Type of the left-hand side target matrix
2450  , typename MT4 // Type of the left-hand side matrix operand
2451  , typename MT5 > // Type of the right-hand side matrix operand
2453  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2454  {
2455  if( LOW )
2456  lmmm( C, A, B, ElementType(1), ElementType(1) );
2457  else if( UPP )
2458  ummm( C, A, B, ElementType(1), ElementType(1) );
2459  else
2460  mmm( C, A, B, ElementType(1), ElementType(1) );
2461  }
2463  //**********************************************************************************************
2464 
2465  //**BLAS-based addition assignment to dense matrices (default)**********************************
2479  template< typename MT3 // Type of the left-hand side target matrix
2480  , typename MT4 // Type of the left-hand side matrix operand
2481  , typename MT5 > // Type of the right-hand side matrix operand
2483  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2484  {
2485  selectLargeAddAssignKernel( C, A, B );
2486  }
2488  //**********************************************************************************************
2489 
2490  //**BLAS-based addition assignment to dense matrices********************************************
2491 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
2492 
2505  template< typename MT3 // Type of the left-hand side target matrix
2506  , typename MT4 // Type of the left-hand side matrix operand
2507  , typename MT5 > // Type of the right-hand side matrix operand
2509  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2510  {
2511  typedef ElementType_<MT3> ET;
2512 
2513  if( IsTriangular<MT4>::value ) {
2514  ResultType_<MT3> tmp( serial( B ) );
2515  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2516  addAssign( C, tmp );
2517  }
2518  else if( IsTriangular<MT5>::value ) {
2519  ResultType_<MT3> tmp( serial( A ) );
2520  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2521  addAssign( C, tmp );
2522  }
2523  else {
2524  gemm( C, A, B, ET(1), ET(1) );
2525  }
2526  }
2528 #endif
2529  //**********************************************************************************************
2530 
2531  //**Restructuring addition assignment to row-major matrices*************************************
2546  template< typename MT > // Type of the target matrix
2548  addAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
2549  {
2551 
2553 
2554  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2555  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2556 
2557  const ForwardFunctor fwd;
2558 
2560  addAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
2561  else if( IsSymmetric<MT1>::value )
2562  addAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
2563  else
2564  addAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
2565  }
2567  //**********************************************************************************************
2568 
2569  //**Addition assignment to sparse matrices******************************************************
2570  // No special implementation for the addition assignment to sparse matrices.
2571  //**********************************************************************************************
2572 
2573  //**Subtraction assignment to dense matrices****************************************************
2586  template< typename MT // Type of the target dense matrix
2587  , bool SO > // Storage order of the target dense matrix
2589  subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2590  {
2592 
2593  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2594  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2595 
2596  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2597  return;
2598  }
2599 
2600  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
2601  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
2602 
2603  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2604  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2605  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2606  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2607  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2608  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2609 
2610  TDMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
2611  }
2613  //**********************************************************************************************
2614 
2615  //**Subtraction assignment to dense matrices (kernel selection)*********************************
2626  template< typename MT3 // Type of the left-hand side target matrix
2627  , typename MT4 // Type of the left-hand side matrix operand
2628  , typename MT5 > // Type of the right-hand side matrix operand
2629  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2630  {
2631  if( ( IsDiagonal<MT4>::value ) ||
2632  ( !BLAZE_DEBUG_MODE && A.rows() <= SIMDSIZE*10UL ) ||
2633  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
2634  selectSmallSubAssignKernel( C, A, B );
2635  else
2636  selectBlasSubAssignKernel( C, A, B );
2637  }
2639  //**********************************************************************************************
2640 
2641  //**Default subtraction assignment to dense matrices (general/general)**************************
2655  template< typename MT3 // Type of the left-hand side target matrix
2656  , typename MT4 // Type of the left-hand side matrix operand
2657  , typename MT5 > // Type of the right-hand side matrix operand
2658  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
2659  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2660  {
2661  const size_t M( A.rows() );
2662  const size_t N( B.columns() );
2663  const size_t K( A.columns() );
2664 
2665  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
2666 
2667  for( size_t j=0UL; j<N; ++j )
2668  {
2669  const size_t kbegin( ( IsLower<MT5>::value )
2670  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2671  :( 0UL ) );
2672  const size_t kend( ( IsUpper<MT5>::value )
2673  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2674  :( K ) );
2675  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
2676 
2677  for( size_t k=kbegin; k<kend; ++k )
2678  {
2679  const size_t ibegin( ( IsLower<MT4>::value )
2681  ?( LOW ? max(j,k+1UL) : k+1UL )
2682  :( LOW ? max(j,k) : k ) )
2683  :( LOW ? j : 0UL ) );
2684  const size_t iend( ( IsUpper<MT4>::value )
2686  ?( UPP ? min(j+1UL,k) : k )
2687  :( UPP ? min(j,k)+1UL : k+1UL ) )
2688  :( UPP ? j+1UL : M ) );
2689 
2690  if( ( LOW || UPP ) && ( ibegin >= iend ) ) continue;
2691  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2692 
2693  const size_t inum( iend - ibegin );
2694  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2695 
2696  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2697  C(i ,j) -= A(i ,k) * B(k,j);
2698  C(i+1UL,j) -= A(i+1UL,k) * B(k,j);
2699  }
2700  if( ipos < iend ) {
2701  C(ipos,j) -= A(ipos,k) * B(k,j);
2702  }
2703  }
2704  }
2705  }
2707  //**********************************************************************************************
2708 
2709  //**Default subtraction assignment to dense matrices (general/diagonal)*************************
2723  template< typename MT3 // Type of the left-hand side target matrix
2724  , typename MT4 // Type of the left-hand side matrix operand
2725  , typename MT5 > // Type of the right-hand side matrix operand
2726  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
2727  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2728  {
2730 
2731  const size_t M( A.rows() );
2732  const size_t N( B.columns() );
2733 
2734  for( size_t j=0UL; j<N; ++j )
2735  {
2736  const size_t ibegin( ( IsLower<MT4>::value )
2737  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
2738  :( 0UL ) );
2739  const size_t iend( ( IsUpper<MT4>::value )
2740  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
2741  :( M ) );
2742  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2743 
2744  const size_t inum( iend - ibegin );
2745  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2746 
2747  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2748  C(i ,j) -= A(i ,j) * B(j,j);
2749  C(i+1UL,j) -= A(i+1UL,j) * B(j,j);
2750  }
2751  if( ipos < iend ) {
2752  C(ipos,j) -= A(ipos,j) * B(j,j);
2753  }
2754  }
2755  }
2757  //**********************************************************************************************
2758 
2759  //**Default subtraction assignment to dense matrices (diagonal/general)*************************
2773  template< typename MT3 // Type of the left-hand side target matrix
2774  , typename MT4 // Type of the left-hand side matrix operand
2775  , typename MT5 > // Type of the right-hand side matrix operand
2776  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
2777  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2778  {
2780 
2781  const size_t M( A.rows() );
2782  const size_t N( B.columns() );
2783 
2784  for( size_t j=0UL; j<N; ++j )
2785  {
2786  const size_t ibegin( ( IsLower<MT5>::value )
2787  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2788  :( 0UL ) );
2789  const size_t iend( ( IsUpper<MT5>::value )
2790  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2791  :( M ) );
2792  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2793 
2794  const size_t inum( iend - ibegin );
2795  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2796 
2797  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2798  C(i ,j) -= A(i ,i ) * B(i ,j);
2799  C(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j);
2800  }
2801  if( ipos < iend ) {
2802  C(ipos,j) -= A(ipos,ipos) * B(ipos,j);
2803  }
2804  }
2805  }
2807  //**********************************************************************************************
2808 
2809  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
2823  template< typename MT3 // Type of the left-hand side target matrix
2824  , typename MT4 // Type of the left-hand side matrix operand
2825  , typename MT5 > // Type of the right-hand side matrix operand
2826  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
2827  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2828  {
2830 
2831  for( size_t i=0UL; i<A.rows(); ++i ) {
2832  C(i,i) -= A(i,i) * B(i,i);
2833  }
2834  }
2836  //**********************************************************************************************
2837 
2838  //**Default subtraction assignment to dense matrices (small matrices)***************************
2852  template< typename MT3 // Type of the left-hand side target matrix
2853  , typename MT4 // Type of the left-hand side matrix operand
2854  , typename MT5 > // Type of the right-hand side matrix operand
2856  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2857  {
2858  selectDefaultSubAssignKernel( C, A, B );
2859  }
2861  //**********************************************************************************************
2862 
2863  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
2878  template< typename MT3 // Type of the left-hand side target matrix
2879  , typename MT4 // Type of the left-hand side matrix operand
2880  , typename MT5 > // Type of the right-hand side matrix operand
2882  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2883  {
2888 
2889  const ForwardFunctor fwd;
2890 
2892  const OppositeType_<MT5> tmp( serial( B ) );
2893  subAssign( ~C, fwd( A * tmp ) );
2894  }
2896  const OppositeType_<MT4> tmp( serial( A ) );
2897  subAssign( ~C, fwd( tmp * B ) );
2898  }
2899  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2900  const OppositeType_<MT5> tmp( serial( B ) );
2901  subAssign( ~C, fwd( A * tmp ) );
2902  }
2903  else {
2904  const OppositeType_<MT4> tmp( serial( A ) );
2905  subAssign( ~C, fwd( tmp * B ) );
2906  }
2907  }
2909  //**********************************************************************************************
2910 
2911  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
2926  template< typename MT3 // Type of the left-hand side target matrix
2927  , typename MT4 // Type of the left-hand side matrix operand
2928  , typename MT5 > // Type of the right-hand side matrix operand
2930  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2931  {
2932  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT4>::value );
2933 
2934  const size_t M( A.rows() );
2935  const size_t N( B.columns() );
2936  const size_t K( A.columns() );
2937 
2938  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
2939 
2940  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
2941  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
2942 
2943  size_t i( 0UL );
2944 
2946  {
2947  for( ; !LOW && !UPP && (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL ) {
2948  for( size_t j=0UL; j<N; ++j )
2949  {
2950  const size_t kbegin( ( IsLower<MT5>::value )
2951  ?( ( IsUpper<MT4>::value )
2952  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2953  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2954  :( IsUpper<MT4>::value ? i : 0UL ) );
2955  const size_t kend( ( IsUpper<MT5>::value )
2956  ?( ( IsLower<MT4>::value )
2957  ?( min( i+SIMDSIZE*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
2958  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
2959  :( IsLower<MT4>::value ? min( i+SIMDSIZE*8UL, K ) : K ) );
2960 
2961  SIMDType xmm1( (~C).load(i ,j) );
2962  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j) );
2963  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j) );
2964  SIMDType xmm4( (~C).load(i+SIMDSIZE*3UL,j) );
2965  SIMDType xmm5( (~C).load(i+SIMDSIZE*4UL,j) );
2966  SIMDType xmm6( (~C).load(i+SIMDSIZE*5UL,j) );
2967  SIMDType xmm7( (~C).load(i+SIMDSIZE*6UL,j) );
2968  SIMDType xmm8( (~C).load(i+SIMDSIZE*7UL,j) );
2969 
2970  for( size_t k=kbegin; k<kend; ++k ) {
2971  const SIMDType b1( set( B(k,j) ) );
2972  xmm1 -= A.load(i ,k) * b1;
2973  xmm2 -= A.load(i+SIMDSIZE ,k) * b1;
2974  xmm3 -= A.load(i+SIMDSIZE*2UL,k) * b1;
2975  xmm4 -= A.load(i+SIMDSIZE*3UL,k) * b1;
2976  xmm5 -= A.load(i+SIMDSIZE*4UL,k) * b1;
2977  xmm6 -= A.load(i+SIMDSIZE*5UL,k) * b1;
2978  xmm7 -= A.load(i+SIMDSIZE*6UL,k) * b1;
2979  xmm8 -= A.load(i+SIMDSIZE*7UL,k) * b1;
2980  }
2981 
2982  (~C).store( i , j, xmm1 );
2983  (~C).store( i+SIMDSIZE , j, xmm2 );
2984  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
2985  (~C).store( i+SIMDSIZE*3UL, j, xmm4 );
2986  (~C).store( i+SIMDSIZE*4UL, j, xmm5 );
2987  (~C).store( i+SIMDSIZE*5UL, j, xmm6 );
2988  (~C).store( i+SIMDSIZE*6UL, j, xmm7 );
2989  (~C).store( i+SIMDSIZE*7UL, j, xmm8 );
2990  }
2991  }
2992  }
2993 
2994  for( ; !LOW && !UPP && (i+SIMDSIZE*4UL) < ipos; i+=SIMDSIZE*5UL )
2995  {
2996  size_t j( 0UL );
2997 
2998  for( ; (j+2UL) <= N; j+=2UL )
2999  {
3000  const size_t kbegin( ( IsLower<MT5>::value )
3001  ?( ( IsUpper<MT4>::value )
3002  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3003  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3004  :( IsUpper<MT4>::value ? i : 0UL ) );
3005  const size_t kend( ( IsUpper<MT5>::value )
3006  ?( ( IsLower<MT4>::value )
3007  ?( min( i+SIMDSIZE*5UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
3008  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
3009  :( IsLower<MT4>::value ? min( i+SIMDSIZE*5UL, K ) : K ) );
3010 
3011  SIMDType xmm1 ( (~C).load(i ,j ) );
3012  SIMDType xmm2 ( (~C).load(i+SIMDSIZE ,j ) );
3013  SIMDType xmm3 ( (~C).load(i+SIMDSIZE*2UL,j ) );
3014  SIMDType xmm4 ( (~C).load(i+SIMDSIZE*3UL,j ) );
3015  SIMDType xmm5 ( (~C).load(i+SIMDSIZE*4UL,j ) );
3016  SIMDType xmm6 ( (~C).load(i ,j+1UL) );
3017  SIMDType xmm7 ( (~C).load(i+SIMDSIZE ,j+1UL) );
3018  SIMDType xmm8 ( (~C).load(i+SIMDSIZE*2UL,j+1UL) );
3019  SIMDType xmm9 ( (~C).load(i+SIMDSIZE*3UL,j+1UL) );
3020  SIMDType xmm10( (~C).load(i+SIMDSIZE*4UL,j+1UL) );
3021 
3022  for( size_t k=kbegin; k<kend; ++k ) {
3023  const SIMDType a1( A.load(i ,k) );
3024  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
3025  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
3026  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
3027  const SIMDType a5( A.load(i+SIMDSIZE*4UL,k) );
3028  const SIMDType b1( set( B(k,j ) ) );
3029  const SIMDType b2( set( B(k,j+1UL) ) );
3030  xmm1 -= a1 * b1;
3031  xmm2 -= a2 * b1;
3032  xmm3 -= a3 * b1;
3033  xmm4 -= a4 * b1;
3034  xmm5 -= a5 * b1;
3035  xmm6 -= a1 * b2;
3036  xmm7 -= a2 * b2;
3037  xmm8 -= a3 * b2;
3038  xmm9 -= a4 * b2;
3039  xmm10 -= a5 * b2;
3040  }
3041 
3042  (~C).store( i , j , xmm1 );
3043  (~C).store( i+SIMDSIZE , j , xmm2 );
3044  (~C).store( i+SIMDSIZE*2UL, j , xmm3 );
3045  (~C).store( i+SIMDSIZE*3UL, j , xmm4 );
3046  (~C).store( i+SIMDSIZE*4UL, j , xmm5 );
3047  (~C).store( i , j+1UL, xmm6 );
3048  (~C).store( i+SIMDSIZE , j+1UL, xmm7 );
3049  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm8 );
3050  (~C).store( i+SIMDSIZE*3UL, j+1UL, xmm9 );
3051  (~C).store( i+SIMDSIZE*4UL, j+1UL, xmm10 );
3052  }
3053 
3054  if( j < N )
3055  {
3056  const size_t kbegin( ( IsLower<MT5>::value )
3057  ?( ( IsUpper<MT4>::value )
3058  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3059  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3060  :( IsUpper<MT4>::value ? i : 0UL ) );
3061  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*5UL, K ) ):( K ) );
3062 
3063  SIMDType xmm1( (~C).load(i ,j) );
3064  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j) );
3065  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j) );
3066  SIMDType xmm4( (~C).load(i+SIMDSIZE*3UL,j) );
3067  SIMDType xmm5( (~C).load(i+SIMDSIZE*4UL,j) );
3068 
3069  for( size_t k=kbegin; k<kend; ++k ) {
3070  const SIMDType b1( set( B(k,j) ) );
3071  xmm1 -= A.load(i ,k) * b1;
3072  xmm2 -= A.load(i+SIMDSIZE ,k) * b1;
3073  xmm3 -= A.load(i+SIMDSIZE*2UL,k) * b1;
3074  xmm4 -= A.load(i+SIMDSIZE*3UL,k) * b1;
3075  xmm5 -= A.load(i+SIMDSIZE*4UL,k) * b1;
3076  }
3077 
3078  (~C).store( i , j, xmm1 );
3079  (~C).store( i+SIMDSIZE , j, xmm2 );
3080  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
3081  (~C).store( i+SIMDSIZE*3UL, j, xmm4 );
3082  (~C).store( i+SIMDSIZE*4UL, j, xmm5 );
3083  }
3084  }
3085 
3086  for( ; !LOW && !UPP && (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
3087  {
3088  size_t j( 0UL );
3089 
3090  for( ; (j+2UL) <= N; j+=2UL )
3091  {
3092  const size_t kbegin( ( IsLower<MT5>::value )
3093  ?( ( IsUpper<MT4>::value )
3094  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3095  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3096  :( IsUpper<MT4>::value ? i : 0UL ) );
3097  const size_t kend( ( IsUpper<MT5>::value )
3098  ?( ( IsLower<MT4>::value )
3099  ?( min( i+SIMDSIZE*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
3100  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
3101  :( IsLower<MT4>::value ? min( i+SIMDSIZE*4UL, K ) : K ) );
3102 
3103  SIMDType xmm1( (~C).load(i ,j ) );
3104  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j ) );
3105  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j ) );
3106  SIMDType xmm4( (~C).load(i+SIMDSIZE*3UL,j ) );
3107  SIMDType xmm5( (~C).load(i ,j+1UL) );
3108  SIMDType xmm6( (~C).load(i+SIMDSIZE ,j+1UL) );
3109  SIMDType xmm7( (~C).load(i+SIMDSIZE*2UL,j+1UL) );
3110  SIMDType xmm8( (~C).load(i+SIMDSIZE*3UL,j+1UL) );
3111 
3112  for( size_t k=kbegin; k<kend; ++k ) {
3113  const SIMDType a1( A.load(i ,k) );
3114  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
3115  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
3116  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
3117  const SIMDType b1( set( B(k,j ) ) );
3118  const SIMDType b2( set( B(k,j+1UL) ) );
3119  xmm1 -= a1 * b1;
3120  xmm2 -= a2 * b1;
3121  xmm3 -= a3 * b1;
3122  xmm4 -= a4 * b1;
3123  xmm5 -= a1 * b2;
3124  xmm6 -= a2 * b2;
3125  xmm7 -= a3 * b2;
3126  xmm8 -= a4 * b2;
3127  }
3128 
3129  (~C).store( i , j , xmm1 );
3130  (~C).store( i+SIMDSIZE , j , xmm2 );
3131  (~C).store( i+SIMDSIZE*2UL, j , xmm3 );
3132  (~C).store( i+SIMDSIZE*3UL, j , xmm4 );
3133  (~C).store( i , j+1UL, xmm5 );
3134  (~C).store( i+SIMDSIZE , j+1UL, xmm6 );
3135  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm7 );
3136  (~C).store( i+SIMDSIZE*3UL, j+1UL, xmm8 );
3137  }
3138 
3139  if( j < N )
3140  {
3141  const size_t kbegin( ( IsLower<MT5>::value )
3142  ?( ( IsUpper<MT4>::value )
3143  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3144  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3145  :( IsUpper<MT4>::value ? i : 0UL ) );
3146  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*4UL, K ) ):( K ) );
3147 
3148  SIMDType xmm1( (~C).load(i ,j) );
3149  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j) );
3150  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j) );
3151  SIMDType xmm4( (~C).load(i+SIMDSIZE*3UL,j) );
3152 
3153  for( size_t k=kbegin; k<kend; ++k ) {
3154  const SIMDType b1( set( B(k,j) ) );
3155  xmm1 -= A.load(i ,k) * b1;
3156  xmm2 -= A.load(i+SIMDSIZE ,k) * b1;
3157  xmm3 -= A.load(i+SIMDSIZE*2UL,k) * b1;
3158  xmm4 -= A.load(i+SIMDSIZE*3UL,k) * b1;
3159  }
3160 
3161  (~C).store( i , j, xmm1 );
3162  (~C).store( i+SIMDSIZE , j, xmm2 );
3163  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
3164  (~C).store( i+SIMDSIZE*3UL, j, xmm4 );
3165  }
3166  }
3167 
3168  for( ; !LOW && !UPP && (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
3169  {
3170  size_t j( 0UL );
3171 
3172  for( ; (j+2UL) <= N; j+=2UL )
3173  {
3174  const size_t kbegin( ( IsLower<MT5>::value )
3175  ?( ( IsUpper<MT4>::value )
3176  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3177  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3178  :( IsUpper<MT4>::value ? i : 0UL ) );
3179  const size_t kend( ( IsUpper<MT5>::value )
3180  ?( ( IsLower<MT4>::value )
3181  ?( min( i+SIMDSIZE*3UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
3182  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
3183  :( IsLower<MT4>::value ? min( i+SIMDSIZE*3UL, K ) : K ) );
3184 
3185  SIMDType xmm1( (~C).load(i ,j ) );
3186  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j ) );
3187  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j ) );
3188  SIMDType xmm4( (~C).load(i ,j+1UL) );
3189  SIMDType xmm5( (~C).load(i+SIMDSIZE ,j+1UL) );
3190  SIMDType xmm6( (~C).load(i+SIMDSIZE*2UL,j+1UL) );
3191 
3192  for( size_t k=kbegin; k<kend; ++k ) {
3193  const SIMDType a1( A.load(i ,k) );
3194  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
3195  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
3196  const SIMDType b1( set( B(k,j ) ) );
3197  const SIMDType b2( set( B(k,j+1UL) ) );
3198  xmm1 -= a1 * b1;
3199  xmm2 -= a2 * b1;
3200  xmm3 -= a3 * b1;
3201  xmm4 -= a1 * b2;
3202  xmm5 -= a2 * b2;
3203  xmm6 -= a3 * b2;
3204  }
3205 
3206  (~C).store( i , j , xmm1 );
3207  (~C).store( i+SIMDSIZE , j , xmm2 );
3208  (~C).store( i+SIMDSIZE*2UL, j , xmm3 );
3209  (~C).store( i , j+1UL, xmm4 );
3210  (~C).store( i+SIMDSIZE , j+1UL, xmm5 );
3211  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm6 );
3212  }
3213 
3214  if( j < N )
3215  {
3216  const size_t kbegin( ( IsLower<MT5>::value )
3217  ?( ( IsUpper<MT4>::value )
3218  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3219  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3220  :( IsUpper<MT4>::value ? i : 0UL ) );
3221  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*3UL, K ) ):( K ) );
3222 
3223  SIMDType xmm1( (~C).load(i ,j) );
3224  SIMDType xmm2( (~C).load(i+SIMDSIZE ,j) );
3225  SIMDType xmm3( (~C).load(i+SIMDSIZE*2UL,j) );
3226 
3227  for( size_t k=kbegin; k<kend; ++k ) {
3228  const SIMDType b1( set( B(k,j) ) );
3229  xmm1 -= A.load(i ,k) * b1;
3230  xmm2 -= A.load(i+SIMDSIZE ,k) * b1;
3231  xmm3 -= A.load(i+SIMDSIZE*2UL,k) * b1;
3232  }
3233 
3234  (~C).store( i , j, xmm1 );
3235  (~C).store( i+SIMDSIZE , j, xmm2 );
3236  (~C).store( i+SIMDSIZE*2UL, j, xmm3 );
3237  }
3238  }
3239 
3240  for( ; !( LOW && UPP ) && (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
3241  {
3242  const size_t jend( LOW ? min(i+SIMDSIZE*2UL,N) : N );
3243  size_t j( UPP ? i : 0UL );
3244 
3245  for( ; (j+2UL) <= jend; j+=2UL )
3246  {
3247  const size_t kbegin( ( IsLower<MT5>::value )
3248  ?( ( IsUpper<MT4>::value )
3249  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3250  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3251  :( IsUpper<MT4>::value ? i : 0UL ) );
3252  const size_t kend( ( IsUpper<MT5>::value )
3253  ?( ( IsLower<MT4>::value )
3254  ?( min( i+SIMDSIZE*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
3255  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
3256  :( IsLower<MT4>::value ? min( i+SIMDSIZE*2UL, K ) : K ) );
3257 
3258  SIMDType xmm1( (~C).load(i ,j ) );
3259  SIMDType xmm2( (~C).load(i+SIMDSIZE,j ) );
3260  SIMDType xmm3( (~C).load(i ,j+1UL) );
3261  SIMDType xmm4( (~C).load(i+SIMDSIZE,j+1UL) );
3262 
3263  for( size_t k=kbegin; k<kend; ++k ) {
3264  const SIMDType a1( A.load(i ,k) );
3265  const SIMDType a2( A.load(i+SIMDSIZE,k) );
3266  const SIMDType b1( set( B(k,j ) ) );
3267  const SIMDType b2( set( B(k,j+1UL) ) );
3268  xmm1 -= a1 * b1;
3269  xmm2 -= a2 * b1;
3270  xmm3 -= a1 * b2;
3271  xmm4 -= a2 * b2;
3272  }
3273 
3274  (~C).store( i , j , xmm1 );
3275  (~C).store( i+SIMDSIZE, j , xmm2 );
3276  (~C).store( i , j+1UL, xmm3 );
3277  (~C).store( i+SIMDSIZE, j+1UL, xmm4 );
3278  }
3279 
3280  if( j < jend )
3281  {
3282  const size_t kbegin( ( IsLower<MT5>::value )
3283  ?( ( IsUpper<MT4>::value )
3284  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3285  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3286  :( IsUpper<MT4>::value ? i : 0UL ) );
3287  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*2UL, K ) ):( K ) );
3288 
3289  SIMDType xmm1( (~C).load(i ,j) );
3290  SIMDType xmm2( (~C).load(i+SIMDSIZE,j) );
3291 
3292  for( size_t k=kbegin; k<kend; ++k ) {
3293  const SIMDType b1( set( B(k,j) ) );
3294  xmm1 -= A.load(i ,k) * b1;
3295  xmm2 -= A.load(i+SIMDSIZE,k) * b1;
3296  }
3297 
3298  (~C).store( i , j, xmm1 );
3299  (~C).store( i+SIMDSIZE, j, xmm2 );
3300  }
3301  }
3302 
3303  for( ; i<ipos; i+=SIMDSIZE )
3304  {
3305  const size_t jend( LOW && UPP ? min(i+SIMDSIZE,N) : N );
3306  size_t j( UPP ? i : 0UL );
3307 
3308  for( ; (j+2UL) <= jend; j+=2UL )
3309  {
3310  const size_t kbegin( ( IsLower<MT5>::value )
3311  ?( ( IsUpper<MT4>::value )
3312  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3313  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3314  :( IsUpper<MT4>::value ? i : 0UL ) );
3315  const size_t kend( ( IsUpper<MT5>::value )
3316  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
3317  :( K ) );
3318 
3319  SIMDType xmm1( (~C).load(i,j ) );
3320  SIMDType xmm2( (~C).load(i,j+1UL) );
3321 
3322  for( size_t k=kbegin; k<kend; ++k ) {
3323  const SIMDType a1( A.load(i,k) );
3324  xmm1 -= a1 * set( B(k,j ) );
3325  xmm2 -= a1 * set( B(k,j+1UL) );
3326  }
3327 
3328  (~C).store( i, j , xmm1 );
3329  (~C).store( i, j+1UL, xmm2 );
3330  }
3331 
3332  if( j < jend )
3333  {
3334  const size_t kbegin( ( IsLower<MT5>::value )
3335  ?( ( IsUpper<MT4>::value )
3336  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3337  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3338  :( IsUpper<MT4>::value ? i : 0UL ) );
3339 
3340  SIMDType xmm1( (~C).load(i,j) );
3341 
3342  for( size_t k=kbegin; k<K; ++k ) {
3343  xmm1 -= A.load(i,k) * set( B(k,j) );
3344  }
3345 
3346  (~C).store( i, j, xmm1 );
3347  }
3348  }
3349 
3350  for( ; remainder && i<M; ++i )
3351  {
3352  const size_t jend( LOW ? i+1UL : N );
3353  size_t j( UPP ? i : 0UL );
3354 
3355  for( ; (j+2UL) <= jend; j+=2UL )
3356  {
3357  const size_t kbegin( ( IsLower<MT5>::value )
3358  ?( ( IsUpper<MT4>::value )
3359  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3360  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3361  :( IsUpper<MT4>::value ? i : 0UL ) );
3362  const size_t kend( ( IsUpper<MT5>::value )
3363  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
3364  :( K ) );
3365 
3366  ElementType value1( (~C)(i,j ) );
3367  ElementType value2( (~C)(i,j+1UL) );
3368 
3369  for( size_t k=kbegin; k<kend; ++k ) {
3370  value1 -= A(i,k) * B(k,j );
3371  value2 -= A(i,k) * B(k,j+1UL);
3372  }
3373 
3374  (~C)(i,j ) = value1;
3375  (~C)(i,j+1UL) = value2;
3376  }
3377 
3378  if( j < jend )
3379  {
3380  const size_t kbegin( ( IsLower<MT5>::value )
3381  ?( ( IsUpper<MT4>::value )
3382  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3383  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3384  :( IsUpper<MT4>::value ? i : 0UL ) );
3385 
3386  ElementType value( (~C)(i,j) );
3387 
3388  for( size_t k=kbegin; k<K; ++k ) {
3389  value -= A(i,k) * B(k,j);
3390  }
3391 
3392  (~C)(i,j) = value;
3393  }
3394  }
3395  }
3397  //**********************************************************************************************
3398 
3399  //**Default subtraction assignment to dense matrices (large matrices)***************************
3413  template< typename MT3 // Type of the left-hand side target matrix
3414  , typename MT4 // Type of the left-hand side matrix operand
3415  , typename MT5 > // Type of the right-hand side matrix operand
3417  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3418  {
3419  selectDefaultSubAssignKernel( C, A, B );
3420  }
3422  //**********************************************************************************************
3423 
3424  //**Vectorized default subtraction assignment to dense matrices (large matrices)****************
3439  template< typename MT3 // Type of the left-hand side target matrix
3440  , typename MT4 // Type of the left-hand side matrix operand
3441  , typename MT5 > // Type of the right-hand side matrix operand
3443  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3444  {
3445  if( LOW )
3446  lmmm( C, A, B, ElementType(-1), ElementType(1) );
3447  else if( UPP )
3448  ummm( C, A, B, ElementType(-1), ElementType(1) );
3449  else
3450  mmm( C, A, B, ElementType(-1), ElementType(1) );
3451  }
3453  //**********************************************************************************************
3454 
3455  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3469  template< typename MT3 // Type of the left-hand side target matrix
3470  , typename MT4 // Type of the left-hand side matrix operand
3471  , typename MT5 > // Type of the right-hand side matrix operand
3473  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3474  {
3475  selectLargeSubAssignKernel( C, A, B );
3476  }
3478  //**********************************************************************************************
3479 
3480  //**BLAS-based subraction assignment to dense matrices******************************************
3481 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
3482 
3495  template< typename MT3 // Type of the left-hand side target matrix
3496  , typename MT4 // Type of the left-hand side matrix operand
3497  , typename MT5 > // Type of the right-hand side matrix operand
3499  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3500  {
3501  typedef ElementType_<MT3> ET;
3502 
3503  if( IsTriangular<MT4>::value ) {
3504  ResultType_<MT3> tmp( serial( B ) );
3505  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
3506  subAssign( C, tmp );
3507  }
3508  else if( IsTriangular<MT5>::value ) {
3509  ResultType_<MT3> tmp( serial( A ) );
3510  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
3511  subAssign( C, tmp );
3512  }
3513  else {
3514  gemm( C, A, B, ET(-1), ET(1) );
3515  }
3516  }
3518 #endif
3519  //**********************************************************************************************
3520 
3521  //**Restructuring subtraction assignment to row-major matrices**********************************
3537  template< typename MT > // Type of the target matrix
3539  subAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
3540  {
3542 
3544 
3545  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3546  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3547 
3548  const ForwardFunctor fwd;
3549 
3551  subAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
3552  else if( IsSymmetric<MT1>::value )
3553  subAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
3554  else
3555  subAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
3556  }
3558  //**********************************************************************************************
3559 
3560  //**Subtraction assignment to sparse matrices***************************************************
3561  // No special implementation for the subtraction assignment to sparse matrices.
3562  //**********************************************************************************************
3563 
3564  //**Multiplication assignment to dense matrices*************************************************
3565  // No special implementation for the multiplication assignment to dense matrices.
3566  //**********************************************************************************************
3567 
3568  //**Multiplication assignment to sparse matrices************************************************
3569  // No special implementation for the multiplication assignment to sparse matrices.
3570  //**********************************************************************************************
3571 
3572  //**SMP assignment to dense matrices************************************************************
3588  template< typename MT // Type of the target dense matrix
3589  , bool SO > // Storage order of the target dense matrix
3591  smpAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
3592  {
3594 
3595  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3596  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3597 
3598  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
3599  return;
3600  }
3601  else if( rhs.lhs_.columns() == 0UL ) {
3602  reset( ~lhs );
3603  return;
3604  }
3605 
3606  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3607  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3608 
3609  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3610  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3611  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3612  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3613  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3614  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3615 
3616  smpAssign( ~lhs, A * B );
3617  }
3619  //**********************************************************************************************
3620 
3621  //**SMP assignment to sparse matrices***********************************************************
3637  template< typename MT // Type of the target sparse matrix
3638  , bool SO > // Storage order of the target sparse matrix
3641  {
3643 
3645 
3652 
3653  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3654  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3655 
3656  const ForwardFunctor fwd;
3657 
3658  const TmpType tmp( rhs );
3659  smpAssign( ~lhs, fwd( tmp ) );
3660  }
3662  //**********************************************************************************************
3663 
3664  //**Restructuring SMP assignment to row-major matrices******************************************
3679  template< typename MT > // Type of the target matrix
3681  smpAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
3682  {
3684 
3686 
3687  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3688  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3689 
3690  const ForwardFunctor fwd;
3691 
3693  smpAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
3694  else if( IsSymmetric<MT1>::value )
3695  smpAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
3696  else
3697  smpAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
3698  }
3700  //**********************************************************************************************
3701 
3702  //**SMP addition assignment to dense matrices***************************************************
3718  template< typename MT // Type of the target dense matrix
3719  , bool SO > // Storage order of the target dense matrix
3722  {
3724 
3725  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3726  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3727 
3728  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
3729  return;
3730  }
3731 
3732  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3733  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3734 
3735  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3736  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3737  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3738  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3739  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3740  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3741 
3742  smpAddAssign( ~lhs, A * B );
3743  }
3745  //**********************************************************************************************
3746 
3747  //**Restructuring SMP addition assignment to row-major matrices*********************************
3763  template< typename MT > // Type of the target matrix
3766  {
3768 
3770 
3771  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3772  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3773 
3774  const ForwardFunctor fwd;
3775 
3777  smpAddAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
3778  else if( IsSymmetric<MT1>::value )
3779  smpAddAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
3780  else
3781  smpAddAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
3782  }
3784  //**********************************************************************************************
3785 
3786  //**SMP addition assignment to sparse matrices**************************************************
3787  // No special implementation for the SMP addition assignment to sparse matrices.
3788  //**********************************************************************************************
3789 
3790  //**SMP subtraction assignment to dense matrices************************************************
3806  template< typename MT // Type of the target dense matrix
3807  , bool SO > // Storage order of the target dense matrix
3810  {
3812 
3813  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3814  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3815 
3816  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
3817  return;
3818  }
3819 
3820  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3821  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3822 
3823  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3824  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3825  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3826  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3827  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3828  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3829 
3830  smpSubAssign( ~lhs, A * B );
3831  }
3833  //**********************************************************************************************
3834 
3835  //**Restructuring SMP subtraction assignment to row-major matrices******************************
3851  template< typename MT > // Type of the target matrix
3854  {
3856 
3858 
3859  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3860  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3861 
3862  const ForwardFunctor fwd;
3863 
3865  smpSubAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
3866  else if( IsSymmetric<MT1>::value )
3867  smpSubAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
3868  else
3869  smpSubAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
3870  }
3872  //**********************************************************************************************
3873 
3874  //**SMP subtraction assignment to sparse matrices***********************************************
3875  // No special implementation for the SMP subtraction assignment to sparse matrices.
3876  //**********************************************************************************************
3877 
3878  //**SMP multiplication assignment to dense matrices*********************************************
3879  // No special implementation for the SMP multiplication assignment to dense matrices.
3880  //**********************************************************************************************
3881 
3882  //**SMP multiplication assignment to sparse matrices********************************************
3883  // No special implementation for the SMP multiplication assignment to sparse matrices.
3884  //**********************************************************************************************
3885 
3886  //**Compile time checks*************************************************************************
3894  //**********************************************************************************************
3895 };
3896 //*************************************************************************************************
3897 
3898 
3899 
3900 
3901 //=================================================================================================
3902 //
3903 // DMATSCALARMULTEXPR SPECIALIZATION
3904 //
3905 //=================================================================================================
3906 
3907 //*************************************************************************************************
3915 template< typename MT1 // Type of the left-hand side dense matrix
3916  , typename MT2 // Type of the right-hand side dense matrix
3917  , bool SF // Symmetry flag
3918  , bool HF // Hermitian flag
3919  , bool LF // Lower flag
3920  , bool UF // Upper flag
3921  , typename ST > // Type of the right-hand side scalar value
3922 class DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, ST, true >
3923  : public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, ST, true >, true >
3924  , private MatScalarMultExpr
3925  , private Computation
3926 {
3927  private:
3928  //**Type definitions****************************************************************************
3931 
3932  typedef ResultType_<MMM> RES;
3933  typedef ResultType_<MT1> RT1;
3934  typedef ResultType_<MT2> RT2;
3935  typedef ElementType_<RT1> ET1;
3936  typedef ElementType_<RT2> ET2;
3937  typedef CompositeType_<MT1> CT1;
3938  typedef CompositeType_<MT2> CT2;
3939  //**********************************************************************************************
3940 
3941  //**********************************************************************************************
3943  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
3944  //**********************************************************************************************
3945 
3946  //**********************************************************************************************
3948  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
3949  //**********************************************************************************************
3950 
3951  //**********************************************************************************************
3953  enum : bool {
3954  SYM = ( SF && !( HF || LF || UF ) ),
3955  HERM = ( HF && !( LF || UF ) ),
3956  LOW = ( LF || ( ( SF || HF ) && UF ) ),
3957  UPP = ( UF || ( ( SF || HF ) && LF ) )
3958  };
3959  //**********************************************************************************************
3960 
3961  //**********************************************************************************************
3963 
3968  template< typename T1, typename T2, typename T3 >
3969  struct CanExploitSymmetry {
3970  enum : bool { value = IsRowMajorMatrix<T1>::value &&
3972  };
3973  //**********************************************************************************************
3974 
3975  //**********************************************************************************************
3977 
3980  template< typename T1, typename T2, typename T3 >
3981  struct IsEvaluationRequired {
3982  enum : bool { value = ( evaluateLeft || evaluateRight ) &&
3983  !CanExploitSymmetry<T1,T2,T3>::value };
3984  };
3985  //**********************************************************************************************
3986 
3987  //**********************************************************************************************
3989 
3991  template< typename T1, typename T2, typename T3, typename T4 >
3992  struct UseBlasKernel {
3994  !SYM && !HERM && !LOW && !UPP &&
3999  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
4004  IsSame< ElementType_<T1>, ElementType_<T3> >::value &&
4006  };
4007  //**********************************************************************************************
4008 
4009  //**********************************************************************************************
4011 
4013  template< typename T1, typename T2, typename T3, typename T4 >
4014  struct UseVectorizedDefaultKernel {
4015  enum : bool { value = useOptimizedKernels &&
4017  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
4021  , T4 >::value &&
4022  HasSIMDAdd< ElementType_<T2>, ElementType_<T2> >::value &&
4023  HasSIMDMult< ElementType_<T3>, ElementType_<T3> >::value };
4024  };
4025  //**********************************************************************************************
4026 
4027  //**********************************************************************************************
4029 
4031  typedef IfTrue_< HERM
4032  , DeclHerm
4033  , IfTrue_< SYM
4034  , DeclSym
4035  , IfTrue_< LOW
4036  , IfTrue_< UPP
4037  , DeclDiag
4038  , DeclLow >
4039  , IfTrue_< UPP
4040  , DeclUpp
4041  , Noop > > > > ForwardFunctor;
4042  //**********************************************************************************************
4043 
4044  public:
4045  //**Type definitions****************************************************************************
4047  typedef MultTrait_<RES,ST> ResultType;
4052  typedef const ElementType ReturnType;
4053  typedef const ResultType CompositeType;
4054 
4057 
4059  typedef ST RightOperand;
4060 
4063 
4066  //**********************************************************************************************
4067 
4068  //**Compilation flags***************************************************************************
4070  enum : bool { simdEnabled = !IsDiagonal<MT1>::value &&
4071  MT1::simdEnabled && MT2::simdEnabled &&
4075 
4077  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
4078  !evaluateRight && MT2::smpAssignable };
4079  //**********************************************************************************************
4080 
4081  //**SIMD properties*****************************************************************************
4083  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
4084  //**********************************************************************************************
4085 
4086  //**Constructor*********************************************************************************
4092  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
4093  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
4094  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
4095  {}
4096  //**********************************************************************************************
4097 
4098  //**Access operator*****************************************************************************
4105  inline ReturnType operator()( size_t i, size_t j ) const {
4106  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
4107  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
4108  return matrix_(i,j) * scalar_;
4109  }
4110  //**********************************************************************************************
4111 
4112  //**At function*********************************************************************************
4120  inline ReturnType at( size_t i, size_t j ) const {
4121  if( i >= matrix_.rows() ) {
4122  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
4123  }
4124  if( j >= matrix_.columns() ) {
4125  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
4126  }
4127  return (*this)(i,j);
4128  }
4129  //**********************************************************************************************
4130 
4131  //**Rows function*******************************************************************************
4136  inline size_t rows() const {
4137  return matrix_.rows();
4138  }
4139  //**********************************************************************************************
4140 
4141  //**Columns function****************************************************************************
4146  inline size_t columns() const {
4147  return matrix_.columns();
4148  }
4149  //**********************************************************************************************
4150 
4151  //**Left operand access*************************************************************************
4156  inline LeftOperand leftOperand() const {
4157  return matrix_;
4158  }
4159  //**********************************************************************************************
4160 
4161  //**Right operand access************************************************************************
4166  inline RightOperand rightOperand() const {
4167  return scalar_;
4168  }
4169  //**********************************************************************************************
4170 
4171  //**********************************************************************************************
4177  template< typename T >
4178  inline bool canAlias( const T* alias ) const {
4179  return matrix_.canAlias( alias );
4180  }
4181  //**********************************************************************************************
4182 
4183  //**********************************************************************************************
4189  template< typename T >
4190  inline bool isAliased( const T* alias ) const {
4191  return matrix_.isAliased( alias );
4192  }
4193  //**********************************************************************************************
4194 
4195  //**********************************************************************************************
4200  inline bool isAligned() const {
4201  return matrix_.isAligned();
4202  }
4203  //**********************************************************************************************
4204 
4205  //**********************************************************************************************
4210  inline bool canSMPAssign() const noexcept {
4211  return ( !BLAZE_BLAS_IS_PARALLEL ||
4212  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
4213  ( rows() * columns() >= SMP_TDMATTDMATMULT_THRESHOLD );
4214  }
4215  //**********************************************************************************************
4216 
4217  private:
4218  //**Member variables****************************************************************************
4219  LeftOperand matrix_;
4220  RightOperand scalar_;
4221  //**********************************************************************************************
4222 
4223  //**Assignment to dense matrices****************************************************************
4235  template< typename MT // Type of the target dense matrix
4236  , bool SO > // Storage order of the target dense matrix
4238  assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4239  {
4241 
4242  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4243  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4244 
4245  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
4246  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
4247 
4248  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4249  return;
4250  }
4251  else if( left.columns() == 0UL ) {
4252  reset( ~lhs );
4253  return;
4254  }
4255 
4256  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4257  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4258 
4259  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4260  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4261  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4262  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4263  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4264  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4265 
4266  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
4267  }
4268  //**********************************************************************************************
4269 
4270  //**Assignment to dense matrices (kernel selection)*********************************************
4281  template< typename MT3 // Type of the left-hand side target matrix
4282  , typename MT4 // Type of the left-hand side matrix operand
4283  , typename MT5 // Type of the right-hand side matrix operand
4284  , typename ST2 > // Type of the scalar value
4285  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4286  {
4287  if( ( IsDiagonal<MT4>::value ) ||
4288  ( !BLAZE_DEBUG_MODE && A.rows() <= SIMDSIZE*10UL ) ||
4289  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
4290  selectSmallAssignKernel( C, A, B, scalar );
4291  else
4292  selectBlasAssignKernel( C, A, B, scalar );
4293  }
4294  //**********************************************************************************************
4295 
4296  //**Default assignment to dense matrices (general/general)**************************************
4310  template< typename MT3 // Type of the left-hand side target matrix
4311  , typename MT4 // Type of the left-hand side matrix operand
4312  , typename MT5 // Type of the right-hand side matrix operand
4313  , typename ST2 > // Type of the scalar value
4315  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4316  {
4317  const size_t M( A.rows() );
4318  const size_t N( B.columns() );
4319  const size_t K( A.columns() );
4320 
4321  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
4322 
4323  for( size_t j=0UL; j<N; ++j )
4324  {
4325  const size_t kbegin( ( IsLower<MT5>::value )
4326  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4327  :( 0UL ) );
4328  const size_t kend( ( IsUpper<MT5>::value )
4329  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4330  :( K ) );
4331  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
4332 
4333  if( IsStrictlyTriangular<MT5>::value && kbegin == kend ) {
4334  for( size_t i=0UL; i<M; ++i ) {
4335  reset( C(i,j) );
4336  }
4337  continue;
4338  }
4339 
4340  {
4341  const size_t ibegin( ( IsLower<MT4>::value )
4343  ?( LOW ? max(j,kbegin+1UL) : kbegin+1UL )
4344  :( LOW ? max(j,kbegin) : kbegin ) )
4345  :( LOW ? j : 0UL ) );
4346  const size_t iend( ( IsUpper<MT4>::value )
4348  ?( UPP ? min(j+1UL,kbegin) : kbegin )
4349  :( UPP ? min(j,kbegin)+1UL : kbegin+1UL ) )
4350  :( UPP ? j+1UL : M ) );
4351 
4352  if( ( IsLower<MT4>::value && IsLower<MT5>::value ) || LOW ) {
4353  for( size_t i=0UL; i<ibegin; ++i ) {
4354  reset( C(i,j) );
4355  }
4356  }
4357  else if( IsStrictlyLower<MT4>::value ) {
4358  reset( C(0UL,j) );
4359  }
4360  for( size_t i=ibegin; i<iend; ++i ) {
4361  C(i,j) = A(i,kbegin) * B(kbegin,j);
4362  }
4363  if( ( IsUpper<MT4>::value && IsUpper<MT5>::value ) || UPP ) {
4364  for( size_t i=iend; i<M; ++i ) {
4365  reset( C(i,j) );
4366  }
4367  }
4368  else if( IsStrictlyUpper<MT4>::value ) {
4369  reset( C(M-1UL,j) );
4370  }
4371  }
4372 
4373  for( size_t k=kbegin+1UL; k<kend; ++k )
4374  {
4375  const size_t ibegin( ( IsLower<MT4>::value )
4377  ?( SYM || HERM || LOW ? max( j, k+1UL ) : k+1UL )
4378  :( SYM || HERM || LOW ? max( j, k ) : k ) )
4379  :( SYM || HERM || LOW ? j : 0UL ) );
4380  const size_t iend( ( IsUpper<MT4>::value )
4382  ?( UPP ? min(j+1UL,k-1UL) : k-1UL )
4383  :( UPP ? min(j+1UL,k) : k ) )
4384  :( UPP ? j+1UL : M ) );
4385 
4386  if( ( SYM || HERM || LOW || UPP ) && ( ibegin > iend ) ) continue;
4387  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4388 
4389  for( size_t i=ibegin; i<iend; ++i ) {
4390  C(i,j) += A(i,k) * B(k,j);
4391  }
4392  if( IsUpper<MT4>::value ) {
4393  C(iend,j) = A(iend,k) * B(k,j);
4394  }
4395  }
4396 
4397  {
4398  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
4400  :( ( SYM || HERM || LOW )?( j ):( 0UL ) ) );
4401  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4403  :( UPP ? j+1UL : M ) );
4404 
4405  if( ( SYM || HERM || LOW || UPP ) && ( ibegin > iend ) ) continue;
4406  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4407 
4408  for( size_t i=ibegin; i<iend; ++i ) {
4409  C(i,j) *= scalar;
4410  }
4411  }
4412  }
4413 
4414  if( SYM || HERM ) {
4415  for( size_t j=1UL; j<N; ++j ) {
4416  for( size_t i=0UL; i<j; ++i ) {
4417  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
4418  }
4419  }
4420  }
4421  }
4422  //**********************************************************************************************
4423 
4424  //**Default assignment to dense matrices (general/diagonal)*************************************
4438  template< typename MT3 // Type of the left-hand side target matrix
4439  , typename MT4 // Type of the left-hand side matrix operand
4440  , typename MT5 // Type of the right-hand side matrix operand
4441  , typename ST2 > // Type of the scalar value
4442  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
4443  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4444  {
4446 
4447  const size_t M( A.rows() );
4448  const size_t N( B.columns() );
4449 
4450  for( size_t j=0UL; j<N; ++j )
4451  {
4452  const size_t ibegin( ( IsLower<MT4>::value )
4453  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
4454  :( 0UL ) );
4455  const size_t iend( ( IsUpper<MT4>::value )
4456  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
4457  :( M ) );
4458  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4459 
4460  if( IsLower<MT4>::value ) {
4461  for( size_t i=0UL; i<ibegin; ++i ) {
4462  reset( C(i,j) );
4463  }
4464  }
4465  for( size_t i=ibegin; i<iend; ++i ) {
4466  C(i,j) = A(i,j) * B(j,j) * scalar;
4467  }
4468  if( IsUpper<MT4>::value ) {
4469  for( size_t i=iend; i<M; ++i ) {
4470  reset( C(i,j) );
4471  }
4472  }
4473  }
4474  }
4475  //**********************************************************************************************
4476 
4477  //**Default assignment to dense matrices (diagonal/general)*************************************
4491  template< typename MT3 // Type of the left-hand side target matrix
4492  , typename MT4 // Type of the left-hand side matrix operand
4493  , typename MT5 // Type of the right-hand side matrix operand
4494  , typename ST2 > // Type of the scalar value
4496  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4497  {
4499 
4500  const size_t M( A.rows() );
4501  const size_t N( B.columns() );
4502 
4503  for( size_t j=0UL; j<N; ++j )
4504  {
4505  const size_t ibegin( ( IsLower<MT5>::value )
4506  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4507  :( 0UL ) );
4508  const size_t iend( ( IsUpper<MT5>::value )
4509  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4510  :( M ) );
4511  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4512 
4513  if( IsLower<MT4>::value ) {
4514  for( size_t i=0UL; i<ibegin; ++i ) {
4515  reset( C(i,j) );
4516  }
4517  }
4518  for( size_t i=ibegin; i<iend; ++i ) {
4519  C(i,j) = A(i,i) * B(i,j) * scalar;
4520  }
4521  if( IsUpper<MT4>::value ) {
4522  for( size_t i=iend; i<M; ++i ) {
4523  reset( C(i,j) );
4524  }
4525  }
4526  }
4527  }
4528  //**********************************************************************************************
4529 
4530  //**Default assignment to dense matrices (diagonal/diagonal)************************************
4544  template< typename MT3 // Type of the left-hand side target matrix
4545  , typename MT4 // Type of the left-hand side matrix operand
4546  , typename MT5 // Type of the right-hand side matrix operand
4547  , typename ST2 > // Type of the scalar value
4548  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
4549  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4550  {
4552 
4553  reset( C );
4554 
4555  for( size_t i=0UL; i<A.rows(); ++i ) {
4556  C(i,i) = A(i,i) * B(i,i) * scalar;
4557  }
4558  }
4559  //**********************************************************************************************
4560 
4561  //**Default assignment to dense matrices (small matrices)***************************************
4575  template< typename MT3 // Type of the left-hand side target matrix
4576  , typename MT4 // Type of the left-hand side matrix operand
4577  , typename MT5 // Type of the right-hand side matrix operand
4578  , typename ST2 > // Type of the scalar value
4580  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4581  {
4582  selectDefaultAssignKernel( C, A, B, scalar );
4583  }
4584  //**********************************************************************************************
4585 
4586  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
4601  template< typename MT3 // Type of the left-hand side target matrix
4602  , typename MT4 // Type of the left-hand side matrix operand
4603  , typename MT5 // Type of the right-hand side matrix operand
4604  , typename ST2 > // Type of the scalar value
4606  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4607  {
4612 
4613  const ForwardFunctor fwd;
4614 
4616  const OppositeType_<MT5> tmp( serial( B ) );
4617  assign( ~C, fwd( A * tmp ) * scalar );
4618  }
4620  const OppositeType_<MT4> tmp( serial( A ) );
4621  assign( ~C, fwd( tmp * B ) * scalar );
4622  }
4623  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
4624  const OppositeType_<MT5> tmp( serial( B ) );
4625  assign( ~C, fwd( A * tmp ) * scalar );
4626  }
4627  else {
4628  const OppositeType_<MT4> tmp( serial( A ) );
4629  assign( ~C, fwd( tmp * B ) * scalar );
4630  }
4631  }
4632  //**********************************************************************************************
4633 
4634  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
4649  template< typename MT3 // Type of the left-hand side target matrix
4650  , typename MT4 // Type of the left-hand side matrix operand
4651  , typename MT5 // Type of the right-hand side matrix operand
4652  , typename ST2 > // Type of the scalar value
4654  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4655  {
4656  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT4>::value );
4657 
4658  const size_t M( A.rows() );
4659  const size_t N( B.columns() );
4660  const size_t K( A.columns() );
4661 
4662  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
4663 
4664  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
4665  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
4666 
4667  const SIMDType factor( set( scalar ) );
4668 
4669  if( LOW && UPP && M > SIMDSIZE*3UL ) {
4670  reset( ~C );
4671  }
4672 
4673  {
4674  size_t i( 0UL );
4675 
4677  {
4678  for( ; !SYM && !HERM && !LOW && !UPP && (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL ) {
4679  for( size_t j=0UL; j<N; ++j )
4680  {
4681  const size_t kbegin( ( IsLower<MT5>::value )
4682  ?( ( IsUpper<MT4>::value )
4683  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4684  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4685  :( IsUpper<MT4>::value ? i : 0UL ) );
4686  const size_t kend( ( IsUpper<MT5>::value )
4687  ?( ( IsLower<MT4>::value )
4688  ?( min( i+SIMDSIZE*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
4689  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
4690  :( IsLower<MT4>::value ? min( i+SIMDSIZE*8UL, K ) : K ) );
4691 
4692  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4693 
4694  for( size_t k=kbegin; k<kend; ++k ) {
4695  const SIMDType b1( set( B(k,j) ) );
4696  xmm1 += A.load(i ,k) * b1;
4697  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
4698  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
4699  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
4700  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
4701  xmm6 += A.load(i+SIMDSIZE*5UL,k) * b1;
4702  xmm7 += A.load(i+SIMDSIZE*6UL,k) * b1;
4703  xmm8 += A.load(i+SIMDSIZE*7UL,k) * b1;
4704  }
4705 
4706  (~C).store( i , j, xmm1 * factor );
4707  (~C).store( i+SIMDSIZE , j, xmm2 * factor );
4708  (~C).store( i+SIMDSIZE*2UL, j, xmm3 * factor );
4709  (~C).store( i+SIMDSIZE*3UL, j, xmm4 * factor );
4710  (~C).store( i+SIMDSIZE*4UL, j, xmm5 * factor );
4711  (~C).store( i+SIMDSIZE*5UL, j, xmm6 * factor );
4712  (~C).store( i+SIMDSIZE*6UL, j, xmm7 * factor );
4713  (~C).store( i+SIMDSIZE*7UL, j, xmm8 * factor );
4714  }
4715  }
4716  }
4717 
4718  for( ; !SYM && !HERM && !LOW && !UPP && (i+SIMDSIZE*4UL) < ipos; i+=SIMDSIZE*5UL )
4719  {
4720  size_t j( 0UL );
4721 
4722  for( ; (j+2UL) <= N; j+=2UL )
4723  {
4724  const size_t kbegin( ( IsLower<MT5>::value )
4725  ?( ( IsUpper<MT4>::value )
4726  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4727  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4728  :( IsUpper<MT4>::value ? i : 0UL ) );
4729  const size_t kend( ( IsUpper<MT5>::value )
4730  ?( ( IsLower<MT4>::value )
4731  ?( min( i+SIMDSIZE*5UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
4732  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
4733  :( IsLower<MT4>::value ? min( i+SIMDSIZE*5UL, K ) : K ) );
4734 
4735  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;
4736 
4737  for( size_t k=kbegin; k<kend; ++k ) {
4738  const SIMDType a1( A.load(i ,k) );
4739  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
4740  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
4741  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
4742  const SIMDType a5( A.load(i+SIMDSIZE*4UL,k) );
4743  const SIMDType b1( set( B(k,j ) ) );
4744  const SIMDType b2( set( B(k,j+1UL) ) );
4745  xmm1 += a1 * b1;
4746  xmm2 += a2 * b1;
4747  xmm3 += a3 * b1;
4748  xmm4 += a4 * b1;
4749  xmm5 += a5 * b1;
4750  xmm6 += a1 * b2;
4751  xmm7 += a2 * b2;
4752  xmm8 += a3 * b2;
4753  xmm9 += a4 * b2;
4754  xmm10 += a5 * b2;
4755  }
4756 
4757  (~C).store( i , j , xmm1 * factor );
4758  (~C).store( i+SIMDSIZE , j , xmm2 * factor );
4759  (~C).store( i+SIMDSIZE*2UL, j , xmm3 * factor );
4760  (~C).store( i+SIMDSIZE*3UL, j , xmm4 * factor );
4761  (~C).store( i+SIMDSIZE*4UL, j , xmm5 * factor );
4762  (~C).store( i , j+1UL, xmm6 * factor );
4763  (~C).store( i+SIMDSIZE , j+1UL, xmm7 * factor );
4764  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm8 * factor );
4765  (~C).store( i+SIMDSIZE*3UL, j+1UL, xmm9 * factor );
4766  (~C).store( i+SIMDSIZE*4UL, j+1UL, xmm10 * factor );
4767  }
4768 
4769  if( j < N )
4770  {
4771  const size_t kbegin( ( IsLower<MT5>::value )
4772  ?( ( IsUpper<MT4>::value )
4773  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4774  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4775  :( IsUpper<MT4>::value ? i : 0UL ) );
4776  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*5UL, K ) ):( K ) );
4777 
4778  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5;
4779 
4780  for( size_t k=kbegin; k<kend; ++k ) {
4781  const SIMDType b1( set( B(k,j) ) );
4782  xmm1 += A.load(i ,k) * b1;
4783  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
4784  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
4785  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
4786  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
4787  }
4788 
4789  (~C).store( i , j, xmm1 * factor );
4790  (~C).store( i+SIMDSIZE , j, xmm2 * factor );
4791  (~C).store( i+SIMDSIZE*2UL, j, xmm3 * factor );
4792  (~C).store( i+SIMDSIZE*3UL, j, xmm4 * factor );
4793  (~C).store( i+SIMDSIZE*4UL, j, xmm5 * factor );
4794  }
4795  }
4796 
4797  for( ; !( LOW && UPP ) && (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
4798  {
4799  const size_t jend( SYM || HERM || LOW ? min(i+SIMDSIZE*4UL,N) : N );
4800  size_t j( UPP ? i : 0UL );
4801 
4802  for( ; (j+2UL) <= jend; j+=2UL )
4803  {
4804  const size_t kbegin( ( IsLower<MT5>::value )
4805  ?( ( IsUpper<MT4>::value )
4806  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4807  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4808  :( IsUpper<MT4>::value ? i : 0UL ) );
4809  const size_t kend( ( IsUpper<MT5>::value )
4810  ?( ( IsLower<MT4>::value )
4811  ?( min( i+SIMDSIZE*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
4812  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
4813  :( IsLower<MT4>::value ? min( i+SIMDSIZE*4UL, K ) : K ) );
4814 
4815  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4816 
4817  for( size_t k=kbegin; k<kend; ++k ) {
4818  const SIMDType a1( A.load(i ,k) );
4819  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
4820  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
4821  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
4822  const SIMDType b1( set( B(k,j ) ) );
4823  const SIMDType b2( set( B(k,j+1UL) ) );
4824  xmm1 += a1 * b1;
4825  xmm2 += a2 * b1;
4826  xmm3 += a3 * b1;
4827  xmm4 += a4 * b1;
4828  xmm5 += a1 * b2;
4829  xmm6 += a2 * b2;
4830  xmm7 += a3 * b2;
4831  xmm8 += a4 * b2;
4832  }
4833 
4834  (~C).store( i , j , xmm1 * factor );
4835  (~C).store( i+SIMDSIZE , j , xmm2 * factor );
4836  (~C).store( i+SIMDSIZE*2UL, j , xmm3 * factor );
4837  (~C).store( i+SIMDSIZE*3UL, j , xmm4 * factor );
4838  (~C).store( i , j+1UL, xmm5 * factor );
4839  (~C).store( i+SIMDSIZE , j+1UL, xmm6 * factor );
4840  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm7 * factor );
4841  (~C).store( i+SIMDSIZE*3UL, j+1UL, xmm8 * factor );
4842  }
4843 
4844  if( j < jend )
4845  {
4846  const size_t kbegin( ( IsLower<MT5>::value )
4847  ?( ( IsUpper<MT4>::value )
4848  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4849  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4850  :( IsUpper<MT4>::value ? i : 0UL ) );
4851  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*4UL, K ) ):( K ) );
4852 
4853  SIMDType xmm1, xmm2, xmm3, xmm4;
4854 
4855  for( size_t k=kbegin; k<kend; ++k ) {
4856  const SIMDType b1( set( B(k,j) ) );
4857  xmm1 += A.load(i ,k) * b1;
4858  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
4859  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
4860  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
4861  }
4862 
4863  (~C).store( i , j, xmm1 * factor );
4864  (~C).store( i+SIMDSIZE , j, xmm2 * factor );
4865  (~C).store( i+SIMDSIZE*2UL, j, xmm3 * factor );
4866  (~C).store( i+SIMDSIZE*3UL, j, xmm4 * factor );
4867  }
4868  }
4869 
4870  for( ; (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
4871  {
4872  const size_t jend( SYM || HERM || LOW ? min(i+SIMDSIZE*3UL,N) : N );
4873  size_t j( UPP ? i : 0UL );
4874 
4875  for( ; (j+2UL) <= jend; j+=2UL )
4876  {
4877  const size_t kbegin( ( IsLower<MT5>::value )
4878  ?( ( IsUpper<MT4>::value )
4879  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4880  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4881  :( IsUpper<MT4>::value ? i : 0UL ) );
4882  const size_t kend( ( IsUpper<MT5>::value )
4883  ?( ( IsLower<MT4>::value )
4884  ?( min( i+SIMDSIZE*3UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
4885  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
4886  :( IsLower<MT4>::value ? min( i+SIMDSIZE*3UL, K ) : K ) );
4887 
4888  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
4889 
4890  for( size_t k=kbegin; k<kend; ++k ) {
4891  const SIMDType a1( A.load(i ,k) );
4892  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
4893  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
4894  const SIMDType b1( set( B(k,j ) ) );
4895  const SIMDType b2( set( B(k,j+1UL) ) );
4896  xmm1 += a1 * b1;
4897  xmm2 += a2 * b1;
4898  xmm3 += a3 * b1;
4899  xmm4 += a1 * b2;
4900  xmm5 += a2 * b2;
4901  xmm6 += a3 * b2;
4902  }
4903 
4904  (~C).store( i , j , xmm1 * factor );
4905  (~C).store( i+SIMDSIZE , j , xmm2 * factor );
4906  (~C).store( i+SIMDSIZE*2UL, j , xmm3 * factor );
4907  (~C).store( i , j+1UL, xmm4 * factor );
4908  (~C).store( i+SIMDSIZE , j+1UL, xmm5 * factor );
4909  (~C).store( i+SIMDSIZE*2UL, j+1UL, xmm6 * factor );
4910  }
4911 
4912  if( j < jend )
4913  {
4914  const size_t kbegin( ( IsLower<MT5>::value )
4915  ?( ( IsUpper<MT4>::value )
4916  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4917  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4918  :( IsUpper<MT4>::value ? i : 0UL ) );
4919  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*3UL, K ) ):( K ) );
4920 
4921  SIMDType xmm1, xmm2, xmm3;
4922 
4923  for( size_t k=kbegin; k<kend; ++k ) {
4924  const SIMDType b1( set( B(k,j) ) );
4925  xmm1 += A.load(i ,k) * b1;
4926  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
4927  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
4928  }
4929 
4930  (~C).store( i , j, xmm1 * factor );
4931  (~C).store( i+SIMDSIZE , j, xmm2 * factor );
4932  (~C).store( i+SIMDSIZE*2UL, j, xmm3 * factor );
4933  }
4934  }
4935 
4936  for( ; (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
4937  {
4938  const size_t jend( SYM || HERM || LOW ? min(i+SIMDSIZE*2UL,N) : N );
4939  size_t j( UPP ? i : 0UL );
4940 
4941  for( ; (j+2UL) <= jend; j+=2UL )
4942  {
4943  const size_t kbegin( ( IsLower<MT5>::value )
4944  ?( ( IsUpper<MT4>::value )
4945  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4946  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4947  :( IsUpper<MT4>::value ? i : 0UL ) );
4948  const size_t kend( ( IsUpper<MT5>::value )
4949  ?( ( IsLower<MT4>::value )
4950  ?( min( i+SIMDSIZE*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
4951  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
4952  :( IsLower<MT4>::value ? min( i+SIMDSIZE*2UL, K ) : K ) );
4953 
4954  SIMDType xmm1, xmm2, xmm3, xmm4;
4955 
4956  for( size_t k=kbegin; k<kend; ++k ) {
4957  const SIMDType a1( A.load(i ,k) );
4958  const SIMDType a2( A.load(i+SIMDSIZE,k) );
4959  const SIMDType b1( set( B(k,j ) ) );
4960  const SIMDType b2( set( B(k,j+1UL) ) );
4961  xmm1 += a1 * b1;
4962  xmm2 += a2 * b1;
4963  xmm3 += a1 * b2;
4964  xmm4 += a2 * b2;
4965  }
4966 
4967  (~C).store( i , j , xmm1 * factor );
4968  (~C).store( i+SIMDSIZE, j , xmm2 * factor );
4969  (~C).store( i , j+1UL, xmm3 * factor );
4970  (~C).store( i+SIMDSIZE, j+1UL, xmm4 * factor );
4971  }
4972 
4973  if( j < jend )
4974  {
4975  const size_t kbegin( ( IsLower<MT5>::value )
4976  ?( ( IsUpper<MT4>::value )
4977  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4978  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4979  :( IsUpper<MT4>::value ? i : 0UL ) );
4980  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*2UL, K ) ):( K ) );
4981 
4982  SIMDType xmm1, xmm2;
4983 
4984  for( size_t k=kbegin; k<kend; ++k ) {
4985  const SIMDType b1( set( B(k,j) ) );
4986  xmm1 += A.load(i ,k) * b1;
4987  xmm2 += A.load(i+SIMDSIZE,k) * b1;
4988  }
4989 
4990  (~C).store( i , j, xmm1 * factor );
4991  (~C).store( i+SIMDSIZE, j, xmm2 * factor );
4992  }
4993  }
4994 
4995  for( ; i<ipos; i+=SIMDSIZE )
4996  {
4997  const size_t jend( SYM || HERM || LOW ? min(i+SIMDSIZE,N) : N );
4998  size_t j( UPP ? i : 0UL );
4999 
5000  for( ; (j+2UL) <= jend; j+=2UL )
5001  {
5002  const size_t kbegin( ( IsLower<MT5>::value )
5003  ?( ( IsUpper<MT4>::value )
5004  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5005  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5006  :( IsUpper<MT4>::value ? i : 0UL ) );
5007  const size_t kend( ( IsUpper<MT5>::value )
5008  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
5009  :( K ) );
5010 
5011  SIMDType xmm1, xmm2;
5012 
5013  for( size_t k=kbegin; k<kend; ++k ) {
5014  const SIMDType a1( A.load(i,k) );
5015  xmm1 += a1 * set( B(k,j ) );
5016  xmm2 += a1 * set( B(k,j+1UL) );
5017  }
5018 
5019  (~C).store( i, j , xmm1 * factor );
5020  (~C).store( i, j+1UL, xmm2 * factor );
5021  }
5022 
5023  if( j < jend )
5024  {
5025  const size_t kbegin( ( IsLower<MT5>::value )
5026  ?( ( IsUpper<MT4>::value )
5027  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5028  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5029  :( IsUpper<MT4>::value ? i : 0UL ) );
5030 
5031  SIMDType xmm1;
5032 
5033  for( size_t k=kbegin; k<K; ++k ) {
5034  xmm1 += A.load(i,k) * set( B(k,j) );
5035  }
5036 
5037  (~C).store( i, j, xmm1 * factor );
5038  }
5039  }
5040 
5041  for( ; remainder && i<M; ++i )
5042  {
5043  size_t j( LOW && UPP ? i : 0UL );
5044 
5045  for( ; (j+2UL) <= N; j+=2UL )
5046  {
5047  const size_t kbegin( ( IsLower<MT5>::value )
5048  ?( ( IsUpper<MT4>::value )
5049  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5050  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5051  :( IsUpper<MT4>::value ? i : 0UL ) );
5052  const size_t kend( ( IsUpper<MT5>::value )
5053  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
5054  :( K ) );
5055 
5056  ElementType value1 = ElementType();
5057  ElementType value2 = ElementType();
5058 
5059  for( size_t k=kbegin; k<kend; ++k ) {
5060  value1 += A(i,k) * B(k,j );
5061  value2 += A(i,k) * B(k,j+1UL);
5062  }
5063 
5064  (~C)(i,j ) = value1 * scalar;
5065  (~C)(i,j+1UL) = value2 * scalar;
5066  }
5067 
5068  if( j < N )
5069  {
5070  const size_t kbegin( ( IsLower<MT5>::value )
5071  ?( ( IsUpper<MT4>::value )
5072  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5073  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5074  :( IsUpper<MT4>::value ? i : 0UL ) );
5075 
5076  ElementType value = ElementType();
5077 
5078  for( size_t k=kbegin; k<K; ++k ) {
5079  value += A(i,k) * B(k,j);
5080  }
5081 
5082  (~C)(i,j) = value * scalar;
5083  }
5084  }
5085  }
5086 
5087  if( ( SYM || HERM ) && ( M > SIMDSIZE*4UL ) ) {
5088  for( size_t j=SIMDSIZE*4UL; j<N; ++j ) {
5089  const size_t iend( ( SIMDSIZE*4UL ) * ( j / (SIMDSIZE*4UL) ) );
5090  for( size_t i=0UL; i<iend; ++i ) {
5091  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
5092  }
5093  }
5094  }
5095  else if( LOW && !UPP && M > SIMDSIZE*4UL ) {
5096  for( size_t j=SIMDSIZE*4UL; j<N; ++j ) {
5097  const size_t iend( ( SIMDSIZE*4UL ) * ( j / (SIMDSIZE*4UL) ) );
5098  for( size_t i=0UL; i<iend; ++i ) {
5099  reset( (~C)(i,j) );
5100  }
5101  }
5102  }
5103  else if( !LOW && UPP && M > SIMDSIZE*4UL ) {
5104  for( size_t i=SIMDSIZE*4UL; i<M; ++i ) {
5105  const size_t jend( ( SIMDSIZE*4UL ) * ( i / (SIMDSIZE*4UL) ) );
5106  for( size_t j=0UL; j<jend; ++j ) {
5107  reset( (~C)(i,j) );
5108  }
5109  }
5110  }
5111  }
5112  //**********************************************************************************************
5113 
5114  //**Default assignment to dense matrices (large matrices)***************************************
5128  template< typename MT3 // Type of the left-hand side target matrix
5129  , typename MT4 // Type of the left-hand side matrix operand
5130  , typename MT5 // Type of the right-hand side matrix operand
5131  , typename ST2 > // Type of the scalar value
5133  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5134  {
5135  selectDefaultAssignKernel( C, A, B, scalar );
5136  }
5137  //**********************************************************************************************
5138 
5139  //**Vectorized default assignment to dense matrices (large matrices)****************************
5154  template< typename MT3 // Type of the left-hand side target matrix
5155  , typename MT4 // Type of the left-hand side matrix operand
5156  , typename MT5 // Type of the right-hand side matrix operand
5157  , typename ST2 > // Type of the scalar value
5159  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5160  {
5161  if( SYM )
5162  smmm( C, A, B, scalar );
5163  else if( HERM )
5164  hmmm( C, A, B, scalar );
5165  else if( LOW )
5166  lmmm( C, A, B, scalar, ST2(0) );
5167  else if( UPP )
5168  ummm( C, A, B, scalar, ST2(0) );
5169  else
5170  mmm( C, A, B, scalar, ST2(0) );
5171  }
5172  //**********************************************************************************************
5173 
5174  //**BLAS-based assignment to dense matrices (default)*******************************************
5188  template< typename MT3 // Type of the left-hand side target matrix
5189  , typename MT4 // Type of the left-hand side matrix operand
5190  , typename MT5 // Type of the right-hand side matrix operand
5191  , typename ST2 > // Type of the scalar value
5193  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5194  {
5195  selectLargeAssignKernel( C, A, B, scalar );
5196  }
5197  //**********************************************************************************************
5198 
5199  //**BLAS-based assignment to dense matrices*****************************************************
5200 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
5201 
5214  template< typename MT3 // Type of the left-hand side target matrix
5215  , typename MT4 // Type of the left-hand side matrix operand
5216  , typename MT5 // Type of the right-hand side matrix operand
5217  , typename ST2 > // Type of the scalar value
5219  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5220  {
5221  typedef ElementType_<MT3> ET;
5222 
5223  if( IsTriangular<MT4>::value ) {
5224  assign( C, B );
5225  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5226  }
5227  else if( IsTriangular<MT5>::value ) {
5228  assign( C, A );
5229  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5230  }
5231  else {
5232  gemm( C, A, B, ET(scalar), ET(0) );
5233  }
5234  }
5235 #endif
5236  //**********************************************************************************************
5237 
5238  //**Assignment to sparse matrices***************************************************************
5250  template< typename MT // Type of the target sparse matrix
5251  , bool SO > // Storage order of the target sparse matrix
5253  assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5254  {
5256 
5258 
5265 
5266  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5267  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5268 
5269  const ForwardFunctor fwd;
5270 
5271  const TmpType tmp( serial( rhs ) );
5272  assign( ~lhs, fwd( tmp ) );
5273  }
5274  //**********************************************************************************************
5275 
5276  //**Restructuring assignment to row-major matrices**********************************************
5290  template< typename MT > // Type of the target matrix
5292  assign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
5293  {
5295 
5297 
5298  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5299  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5300 
5301  const ForwardFunctor fwd;
5302 
5303  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
5304  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
5305 
5307  assign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
5308  else if( IsSymmetric<MT1>::value )
5309  assign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
5310  else
5311  assign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
5312  }
5313  //**********************************************************************************************
5314 
5315  //**Addition assignment to dense matrices*******************************************************
5327  template< typename MT // Type of the target dense matrix
5328  , bool SO > // Storage order of the target dense matrix
5330  addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5331  {
5333 
5334  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5335  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5336 
5337  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
5338  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
5339 
5340  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
5341  return;
5342  }
5343 
5344  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
5345  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
5346 
5347  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5348  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
5349  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
5350  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
5351  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
5352  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
5353 
5354  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
5355  }
5356  //**********************************************************************************************
5357 
5358  //**Addition assignment to dense matrices (kernel selection)************************************
5369  template< typename MT3 // Type of the left-hand side target matrix
5370  , typename MT4 // Type of the left-hand side matrix operand
5371  , typename MT5 // Type of the right-hand side matrix operand
5372  , typename ST2 > // Type of the scalar value
5373  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5374  {
5375  if( ( IsDiagonal<MT4>::value ) ||
5376  ( !BLAZE_DEBUG_MODE && A.rows() <= SIMDSIZE*10UL ) ||
5377  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
5378  selectSmallAddAssignKernel( C, A, B, scalar );
5379  else
5380  selectBlasAddAssignKernel( C, A, B, scalar );
5381  }
5382  //**********************************************************************************************
5383 
5384  //**Default addition assignment to dense matrices (general/general)*****************************
5398  template< typename MT3 // Type of the left-hand side target matrix
5399  , typename MT4 // Type of the left-hand side matrix operand
5400  , typename MT5 // Type of the right-hand side matrix operand
5401  , typename ST2 > // Type of the scalar value
5402  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
5403  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5404  {
5405  const ResultType tmp( serial( A * B * scalar ) );
5406  addAssign( C, tmp );
5407  }
5408  //**********************************************************************************************
5409 
5410  //**Default addition assignment to dense matrices (general/diagonal)****************************
5424  template< typename MT3 // Type of the left-hand side target matrix
5425  , typename MT4 // Type of the left-hand side matrix operand
5426  , typename MT5 // Type of the right-hand side matrix operand
5427  , typename ST2 > // Type of the scalar value
5428  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
5429  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5430  {
5432 
5433  const size_t M( A.rows() );
5434  const size_t N( B.columns() );
5435 
5436  for( size_t j=0UL; j<N; ++j )
5437  {
5438  const size_t ibegin( ( IsLower<MT4>::value )
5439  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
5440  :( 0UL ) );
5441  const size_t iend( ( IsUpper<MT4>::value )
5442  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
5443  :( M ) );
5444  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
5445 
5446  const size_t inum( iend - ibegin );
5447  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
5448 
5449  for( size_t i=ibegin; i<ipos; i+=2UL ) {
5450  C(i ,j) += A(i ,j) * B(j,j) * scalar;
5451  C(i+1UL,j) += A(i+1UL,j) * B(j,j) * scalar;
5452  }
5453  if( ipos < iend ) {
5454  C(ipos,j) += A(ipos,j) * B(j,j) * scalar;
5455  }
5456  }
5457  }
5458  //**********************************************************************************************
5459 
5460  //**Default addition assignment to dense matrices (diagonal/general)****************************
5474  template< typename MT3 // Type of the left-hand side target matrix
5475  , typename MT4 // Type of the left-hand side matrix operand
5476  , typename MT5 // Type of the right-hand side matrix operand
5477  , typename ST2 > // Type of the scalar value
5478  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
5479  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5480  {
5482 
5483  const size_t M( A.rows() );
5484  const size_t N( B.columns() );
5485 
5486  for( size_t j=0UL; j<N; ++j )
5487  {
5488  const size_t ibegin( ( IsLower<MT5>::value )
5489  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
5490  :( 0UL ) );
5491  const size_t iend( ( IsUpper<MT5>::value )
5492  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
5493  :( M ) );
5494  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
5495 
5496  const size_t inum( iend - ibegin );
5497  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
5498 
5499  for( size_t i=ibegin; i<ipos; i+=2UL ) {
5500  C(i ,j) += A(i ,i ) * B(i ,j) * scalar;
5501  C(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
5502  }
5503  if( ipos < iend ) {
5504  C(ipos,j) += A(ipos,ipos) * B(ipos,j) * scalar;
5505  }
5506  }
5507  }
5508  //**********************************************************************************************
5509 
5510  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
5524  template< typename MT3 // Type of the left-hand side target matrix
5525  , typename MT4 // Type of the left-hand side matrix operand
5526  , typename MT5 // Type of the right-hand side matrix operand
5527  , typename ST2 > // Type of the scalar value
5528  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
5529  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5530  {
5532 
5533  for( size_t i=0UL; i<A.rows(); ++i ) {
5534  C(i,i) += A(i,i) * B(i,i) * scalar;
5535  }
5536  }
5537  //**********************************************************************************************
5538 
5539  //**Default addition assignment to dense matrices (small matrices)******************************
5553  template< typename MT3 // Type of the left-hand side target matrix
5554  , typename MT4 // Type of the left-hand side matrix operand
5555  , typename MT5 // Type of the right-hand side matrix operand
5556  , typename ST2 > // Type of the scalar value
5558  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5559  {
5560  selectDefaultAddAssignKernel( C, A, B, scalar );
5561  }
5562  //**********************************************************************************************
5563 
5564  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
5579  template< typename MT3 // Type of the left-hand side target matrix
5580  , typename MT4 // Type of the left-hand side matrix operand
5581  , typename MT5 // Type of the right-hand side matrix operand
5582  , typename ST2 > // Type of the scalar value
5584  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5585  {
5590 
5591  const ForwardFunctor fwd;
5592 
5594  const OppositeType_<MT5> tmp( serial( B ) );
5595  addAssign( ~C, fwd( A * tmp ) * scalar );
5596  }
5598  const OppositeType_<MT4> tmp( serial( A ) );
5599  addAssign( ~C, fwd( tmp * B ) * scalar );
5600  }
5601  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
5602  const OppositeType_<MT5> tmp( serial( B ) );
5603  addAssign( ~C, fwd( A * tmp ) * scalar );
5604  }
5605  else {
5606  const OppositeType_<MT4> tmp( serial( A ) );
5607  addAssign( ~C, fwd( tmp * B ) * scalar );
5608  }
5609  }
5610  //**********************************************************************************************
5611 
5612  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
5627  template< typename MT3 // Type of the left-hand side target matrix
5628  , typename MT4 // Type of the left-hand side matrix operand
5629  , typename MT5 // Type of the right-hand side matrix operand
5630  , typename ST2 > // Type of the scalar value
5632  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5633  {
5634  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT4>::value );
5635 
5636  const size_t M( A.rows() );
5637  const size_t N( B.columns() );
5638  const size_t K( A.columns() );
5639 
5640  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
5641 
5642  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
5643  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
5644 
5645  const SIMDType factor( set( scalar ) );
5646 
5647  size_t i( 0UL );
5648 
5650  {
5651  for( ; !LOW && !UPP && (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL ) {
5652  for( size_t j=0UL; j<N; ++j )
5653  {
5654  const size_t kbegin( ( IsLower<MT5>::value )
5655  ?( ( IsUpper<MT4>::value )
5656  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5657  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5658  :( IsUpper<MT4>::value ? i : 0UL ) );
5659  const size_t kend( ( IsUpper<MT5>::value )
5660  ?( ( IsLower<MT4>::value )
5661  ?( min( i+SIMDSIZE*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
5662  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
5663  :( IsLower<MT4>::value ? min( i+SIMDSIZE*8UL, K ) : K ) );
5664 
5665  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5666 
5667  for( size_t k=kbegin; k<kend; ++k ) {
5668  const SIMDType b1( set( B(k,j) ) );
5669  xmm1 += A.load(i ,k) * b1;
5670  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
5671  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
5672  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
5673  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
5674  xmm6 += A.load(i+SIMDSIZE*5UL,k) * b1;
5675  xmm7 += A.load(i+SIMDSIZE*6UL,k) * b1;
5676  xmm8 += A.load(i+SIMDSIZE*7UL,k) * b1;
5677  }
5678 
5679  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
5680  (~C).store( i+SIMDSIZE , j, (~C).load(i+SIMDSIZE ,j) + xmm2 * factor );
5681  (~C).store( i+SIMDSIZE*2UL, j, (~C).load(i+SIMDSIZE*2UL,j) + xmm3 * factor );
5682  (~C).store( i+SIMDSIZE*3UL, j, (~C).load(i+SIMDSIZE*3UL,j) + xmm4 * factor );
5683  (~C).store( i+SIMDSIZE*4UL, j, (~C).load(i+SIMDSIZE*4UL,j) + xmm5 * factor );
5684  (~C).store( i+SIMDSIZE*5UL, j, (~C).load(i+SIMDSIZE*5UL,j) + xmm6 * factor );
5685  (~C).store( i+SIMDSIZE*6UL, j, (~C).load(i+SIMDSIZE*6UL,j) + xmm7 * factor );
5686  (~C).store( i+SIMDSIZE*7UL, j, (~C).load(i+SIMDSIZE*7UL,j) + xmm8 * factor );
5687  }
5688  }
5689  }
5690 
5691  for( ; !LOW && !UPP && (i+SIMDSIZE*4UL) < ipos; i+=SIMDSIZE*5UL )
5692  {
5693  size_t j( 0UL );
5694 
5695  for( ; (j+2UL) <= N; j+=2UL )
5696  {
5697  const size_t kbegin( ( IsLower<MT5>::value )
5698  ?( ( IsUpper<MT4>::value )
5699  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5700  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5701  :( IsUpper<MT4>::value ? i : 0UL ) );
5702  const size_t kend( ( IsUpper<MT5>::value )
5703  ?( ( IsLower<MT4>::value )
5704  ?( min( i+SIMDSIZE*5UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
5705  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
5706  :( IsLower<MT4>::value ? min( i+SIMDSIZE*5UL, K ) : K ) );
5707 
5708  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;
5709 
5710  for( size_t k=kbegin; k<kend; ++k ) {
5711  const SIMDType a1( A.load(i ,k) );
5712  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
5713  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
5714  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
5715  const SIMDType a5( A.load(i+SIMDSIZE*4UL,k) );
5716  const SIMDType b1( set( B(k,j ) ) );
5717  const SIMDType b2( set( B(k,j+1UL) ) );
5718  xmm1 += a1 * b1;
5719  xmm2 += a2 * b1;
5720  xmm3 += a3 * b1;
5721  xmm4 += a4 * b1;
5722  xmm5 += a5 * b1;
5723  xmm6 += a1 * b2;
5724  xmm7 += a2 * b2;
5725  xmm8 += a3 * b2;
5726  xmm9 += a4 * b2;
5727  xmm10 += a5 * b2;
5728  }
5729 
5730  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
5731  (~C).store( i+SIMDSIZE , j , (~C).load(i+SIMDSIZE ,j ) + xmm2 * factor );
5732  (~C).store( i+SIMDSIZE*2UL, j , (~C).load(i+SIMDSIZE*2UL,j ) + xmm3 * factor );
5733  (~C).store( i+SIMDSIZE*3UL, j , (~C).load(i+SIMDSIZE*3UL,j ) + xmm4 * factor );
5734  (~C).store( i+SIMDSIZE*4UL, j , (~C).load(i+SIMDSIZE*4UL,j ) + xmm5 * factor );
5735  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm6 * factor );
5736  (~C).store( i+SIMDSIZE , j+1UL, (~C).load(i+SIMDSIZE ,j+1UL) + xmm7 * factor );
5737  (~C).store( i+SIMDSIZE*2UL, j+1UL, (~C).load(i+SIMDSIZE*2UL,j+1UL) + xmm8 * factor );
5738  (~C).store( i+SIMDSIZE*3UL, j+1UL, (~C).load(i+SIMDSIZE*3UL,j+1UL) + xmm9 * factor );
5739  (~C).store( i+SIMDSIZE*4UL, j+1UL, (~C).load(i+SIMDSIZE*4UL,j+1UL) + xmm10 * factor );
5740  }
5741 
5742  if( j < N )
5743  {
5744  const size_t kbegin( ( IsLower<MT5>::value )
5745  ?( ( IsUpper<MT4>::value )
5746  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5747  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5748  :( IsUpper<MT4>::value ? i : 0UL ) );
5749  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*5UL, K ) ):( K ) );
5750 
5751  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5;
5752 
5753  for( size_t k=kbegin; k<kend; ++k ) {
5754  const SIMDType b1( set( B(k,j) ) );
5755  xmm1 += A.load(i ,k) * b1;
5756  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
5757  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
5758  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
5759  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
5760  }
5761 
5762  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
5763  (~C).store( i+SIMDSIZE , j, (~C).load(i+SIMDSIZE ,j) + xmm2 * factor );
5764  (~C).store( i+SIMDSIZE*2UL, j, (~C).load(i+SIMDSIZE*2UL,j) + xmm3 * factor );
5765  (~C).store( i+SIMDSIZE*3UL, j, (~C).load(i+SIMDSIZE*3UL,j) + xmm4 * factor );
5766  (~C).store( i+SIMDSIZE*4UL, j, (~C).load(i+SIMDSIZE*4UL,j) + xmm5 * factor );
5767  }
5768  }
5769 
5770  for( ; !LOW && !UPP && (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
5771  {
5772  size_t j( 0UL );
5773 
5774  for( ; (j+2UL) <= N; j+=2UL )
5775  {
5776  const size_t kbegin( ( IsLower<MT5>::value )
5777  ?( ( IsUpper<MT4>::value )
5778  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5779  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5780  :( IsUpper<MT4>::value ? i : 0UL ) );
5781  const size_t kend( ( IsUpper<MT5>::value )
5782  ?( ( IsLower<MT4>::value )
5783  ?( min( i+SIMDSIZE*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
5784  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
5785  :( IsLower<MT4>::value ? min( i+SIMDSIZE*4UL, K ) : K ) );
5786 
5787  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5788 
5789  for( size_t k=kbegin; k<kend; ++k ) {
5790  const SIMDType a1( A.load(i ,k) );
5791  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
5792  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
5793  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
5794  const SIMDType b1( set( B(k,j ) ) );
5795  const SIMDType b2( set( B(k,j+1UL) ) );
5796  xmm1 += a1 * b1;
5797  xmm2 += a2 * b1;
5798  xmm3 += a3 * b1;
5799  xmm4 += a4 * b1;
5800  xmm5 += a1 * b2;
5801  xmm6 += a2 * b2;
5802  xmm7 += a3 * b2;
5803  xmm8 += a4 * b2;
5804  }
5805 
5806  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
5807  (~C).store( i+SIMDSIZE , j , (~C).load(i+SIMDSIZE ,j ) + xmm2 * factor );
5808  (~C).store( i+SIMDSIZE*2UL, j , (~C).load(i+SIMDSIZE*2UL,j ) + xmm3 * factor );
5809  (~C).store( i+SIMDSIZE*3UL, j , (~C).load(i+SIMDSIZE*3UL,j ) + xmm4 * factor );
5810  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
5811  (~C).store( i+SIMDSIZE , j+1UL, (~C).load(i+SIMDSIZE ,j+1UL) + xmm6 * factor );
5812  (~C).store( i+SIMDSIZE*2UL, j+1UL, (~C).load(i+SIMDSIZE*2UL,j+1UL) + xmm7 * factor );
5813  (~C).store( i+SIMDSIZE*3UL, j+1UL, (~C).load(i+SIMDSIZE*3UL,j+1UL) + xmm8 * factor );
5814  }
5815 
5816  if( j < N )
5817  {
5818  const size_t kbegin( ( IsLower<MT5>::value )
5819  ?( ( IsUpper<MT4>::value )
5820  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5821  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5822  :( IsUpper<MT4>::value ? i : 0UL ) );
5823  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*4UL, K ) ):( K ) );
5824 
5825  SIMDType xmm1, xmm2, xmm3, xmm4;
5826 
5827  for( size_t k=kbegin; k<kend; ++k ) {
5828  const SIMDType b1( set( B(k,j) ) );
5829  xmm1 += A.load(i ,k) * b1;
5830  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
5831  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
5832  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
5833  }
5834 
5835  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
5836  (~C).store( i+SIMDSIZE , j, (~C).load(i+SIMDSIZE ,j) + xmm2 * factor );
5837  (~C).store( i+SIMDSIZE*2UL, j, (~C).load(i+SIMDSIZE*2UL,j) + xmm3 * factor );
5838  (~C).store( i+SIMDSIZE*3UL, j, (~C).load(i+SIMDSIZE*3UL,j) + xmm4 * factor );
5839  }
5840  }
5841 
5842  for( ; !LOW && !UPP && (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
5843  {
5844  size_t j( 0UL );
5845 
5846  for( ; (j+2UL) <= N; j+=2UL )
5847  {
5848  const size_t kbegin( ( IsLower<MT5>::value )
5849  ?( ( IsUpper<MT4>::value )
5850  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5851  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5852  :( IsUpper<MT4>::value ? i : 0UL ) );
5853  const size_t kend( ( IsUpper<MT5>::value )
5854  ?( ( IsLower<MT4>::value )
5855  ?( min( i+SIMDSIZE*3UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
5856  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
5857  :( IsLower<MT4>::value ? min( i+SIMDSIZE*3UL, K ) : K ) );
5858 
5859  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
5860 
5861  for( size_t k=kbegin; k<kend; ++k ) {
5862  const SIMDType a1( A.load(i ,k) );
5863  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
5864  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
5865  const SIMDType b1( set( B(k,j ) ) );
5866  const SIMDType b2( set( B(k,j+1UL) ) );
5867  xmm1 += a1 * b1;
5868  xmm2 += a2 * b1;
5869  xmm3 += a3 * b1;
5870  xmm4 += a1 * b2;
5871  xmm5 += a2 * b2;
5872  xmm6 += a3 * b2;
5873  }
5874 
5875  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
5876  (~C).store( i+SIMDSIZE , j , (~C).load(i+SIMDSIZE ,j ) + xmm2 * factor );
5877  (~C).store( i+SIMDSIZE*2UL, j , (~C).load(i+SIMDSIZE*2UL,j ) + xmm3 * factor );
5878  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm4 * factor );
5879  (~C).store( i+SIMDSIZE , j+1UL, (~C).load(i+SIMDSIZE ,j+1UL) + xmm5 * factor );
5880  (~C).store( i+SIMDSIZE*2UL, j+1UL, (~C).load(i+SIMDSIZE*2UL,j+1UL) + xmm6 * factor );
5881  }
5882 
5883  if( j < N )
5884  {
5885  const size_t kbegin( ( IsLower<MT5>::value )
5886  ?( ( IsUpper<MT4>::value )
5887  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5888  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5889  :( IsUpper<MT4>::value ? i : 0UL ) );
5890  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*3UL, K ) ):( K ) );
5891 
5892  SIMDType xmm1, xmm2, xmm3;
5893 
5894  for( size_t k=kbegin; k<kend; ++k ) {
5895  const SIMDType b1( set( B(k,j) ) );
5896  xmm1 += A.load(i ,k) * b1;
5897  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
5898  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
5899  }
5900 
5901  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
5902  (~C).store( i+SIMDSIZE , j, (~C).load(i+SIMDSIZE ,j) + xmm2 * factor );
5903  (~C).store( i+SIMDSIZE*2UL, j, (~C).load(i+SIMDSIZE*2UL,j) + xmm3 * factor );
5904  }
5905  }
5906 
5907  for( ; !( LOW && UPP ) && (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
5908  {
5909  const size_t jend( LOW ? min(i+SIMDSIZE*2UL,N) : N );
5910  size_t j( UPP ? i : 0UL );
5911 
5912  for( ; (j+2UL) <= jend; j+=2UL )
5913  {
5914  const size_t kbegin( ( IsLower<MT5>::value )
5915  ?( ( IsUpper<MT4>::value )
5916  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5917  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5918  :( IsUpper<MT4>::value ? i : 0UL ) );
5919  const size_t kend( ( IsUpper<MT5>::value )
5920  ?( ( IsLower<MT4>::value )
5921  ?( min( i+SIMDSIZE*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
5922  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
5923  :( IsLower<MT4>::value ? min( i+SIMDSIZE*2UL, K ) : K ) );
5924 
5925  SIMDType xmm1, xmm2, xmm3, xmm4;
5926 
5927  for( size_t k=kbegin; k<kend; ++k ) {
5928  const SIMDType a1( A.load(i ,k) );
5929  const SIMDType a2( A.load(i+SIMDSIZE,k) );
5930  const SIMDType b1( set( B(k,j ) ) );
5931  const SIMDType b2( set( B(k,j+1UL) ) );
5932  xmm1 += a1 * b1;
5933  xmm2 += a2 * b1;
5934  xmm3 += a1 * b2;
5935  xmm4 += a2 * b2;
5936  }
5937 
5938  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
5939  (~C).store( i+SIMDSIZE, j , (~C).load(i+SIMDSIZE,j ) + xmm2 * factor );
5940  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
5941  (~C).store( i+SIMDSIZE, j+1UL, (~C).load(i+SIMDSIZE,j+1UL) + xmm4 * factor );
5942  }
5943 
5944  if( j < jend )
5945  {
5946  const size_t kbegin( ( IsLower<MT5>::value )
5947  ?( ( IsUpper<MT4>::value )
5948  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5949  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5950  :( IsUpper<MT4>::value ? i : 0UL ) );
5951  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*2UL, K ) ):( K ) );
5952 
5953  SIMDType xmm1, xmm2;
5954 
5955  for( size_t k=kbegin; k<kend; ++k ) {
5956  const SIMDType b1( set( B(k,j) ) );
5957  xmm1 += A.load(i ,k) * b1;
5958  xmm2 += A.load(i+SIMDSIZE,k) * b1;
5959  }
5960 
5961  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
5962  (~C).store( i+SIMDSIZE, j, (~C).load(i+SIMDSIZE,j) + xmm2 * factor );
5963  }
5964  }
5965 
5966  for( ; i<ipos; i+=SIMDSIZE )
5967  {
5968  const size_t jend( LOW && UPP ? min(i+SIMDSIZE,N) : N );
5969  size_t j( UPP ? i : 0UL );
5970 
5971  for( ; (j+2UL) <= jend; j+=2UL )
5972  {
5973  const size_t kbegin( ( IsLower<MT5>::value )
5974  ?( ( IsUpper<MT4>::value )
5975  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5976  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5977  :( IsUpper<MT4>::value ? i : 0UL ) );
5978  const size_t kend( ( IsUpper<MT5>::value )
5979  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
5980  :( K ) );
5981 
5982  SIMDType xmm1, xmm2;
5983 
5984  for( size_t k=kbegin; k<kend; ++k ) {
5985  const SIMDType a1( A.load(i,k) );
5986  xmm1 += a1 * set( B(k,j ) );
5987  xmm2 += a1 * set( B(k,j+1UL) );
5988  }
5989 
5990  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
5991  (~C).store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
5992  }
5993 
5994  if( j < jend )
5995  {
5996  const size_t kbegin( ( IsLower<MT5>::value )
5997  ?( ( IsUpper<MT4>::value )
5998  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5999  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6000  :( IsUpper<MT4>::value ? i : 0UL ) );
6001 
6002  SIMDType xmm1;
6003 
6004  for( size_t k=kbegin; k<K; ++k ) {
6005  xmm1 += A.load(i,k) * set( B(k,j) );
6006  }
6007 
6008  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
6009  }
6010  }
6011 
6012  for( ; remainder && i<M; ++i )
6013  {
6014  const size_t jend( LOW ? i+1UL : N );
6015  size_t j( UPP ? i : 0UL );
6016 
6017  for( ; (j+2UL) <= jend; j+=2UL )
6018  {
6019  const size_t kbegin( ( IsLower<MT5>::value )
6020  ?( ( IsUpper<MT4>::value )
6021  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6022  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6023  :( IsUpper<MT4>::value ? i : 0UL ) );
6024  const size_t kend( ( IsUpper<MT5>::value )
6025  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
6026  :( K ) );
6027 
6028  ElementType value1 = ElementType();
6029  ElementType value2 = ElementType();
6030 
6031  for( size_t k=kbegin; k<kend; ++k ) {
6032  value1 += A(i,k) * B(k,j );
6033  value2 += A(i,k) * B(k,j+1UL);
6034  }
6035 
6036  (~C)(i,j ) += value1 * scalar;
6037  (~C)(i,j+1UL) += value2 * scalar;
6038  }
6039 
6040  if( j < jend )
6041  {
6042  const size_t kbegin( ( IsLower<MT5>::value )
6043  ?( ( IsUpper<MT4>::value )
6044  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6045  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6046  :( IsUpper<MT4>::value ? i : 0UL ) );
6047 
6048  ElementType value = ElementType();
6049 
6050  for( size_t k=kbegin; k<K; ++k ) {
6051  value += A(i,k) * B(k,j);
6052  }
6053 
6054  (~C)(i,j) += value * scalar;
6055  }
6056  }
6057  }
6058  //**********************************************************************************************
6059 
6060  //**Default addition assignment to dense matrices (large matrices)******************************
6074  template< typename MT3 // Type of the left-hand side target matrix
6075  , typename MT4 // Type of the left-hand side matrix operand
6076  , typename MT5 // Type of the right-hand side matrix operand
6077  , typename ST2 > // Type of the scalar value
6079  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6080  {
6081  selectDefaultAddAssignKernel( C, A, B, scalar );
6082  }
6083  //**********************************************************************************************
6084 
6085  //**Vectorized default addition assignment to dense matrices (large matrices)*******************
6100  template< typename MT3 // Type of the left-hand side target matrix
6101  , typename MT4 // Type of the left-hand side matrix operand
6102  , typename MT5 // Type of the right-hand side matrix operand
6103  , typename ST2 > // Type of the scalar value
6105  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6106  {
6107  if( LOW )
6108  lmmm( C, A, B, scalar, ST2(1) );
6109  else if( UPP )
6110  ummm( C, A, B, scalar, ST2(1) );
6111  else
6112  mmm( C, A, B, scalar, ST2(1) );
6113  }
6114  //**********************************************************************************************
6115 
6116  //**BLAS-based addition assignment to dense matrices (default)**********************************
6131  template< typename MT3 // Type of the left-hand side target matrix
6132  , typename MT4 // Type of the left-hand side matrix operand
6133  , typename MT5 // Type of the right-hand side matrix operand
6134  , typename ST2 > // Type of the scalar value
6136  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6137  {
6138  selectLargeAddAssignKernel( C, A, B, scalar );
6139  }
6140  //**********************************************************************************************
6141 
6142  //**BLAS-based addition assignment to dense matrices********************************************
6143 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
6144 
6157  template< typename MT3 // Type of the left-hand side target matrix
6158  , typename MT4 // Type of the left-hand side matrix operand
6159  , typename MT5 // Type of the right-hand side matrix operand
6160  , typename ST2 > // Type of the scalar value
6162  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6163  {
6164  typedef ElementType_<MT3> ET;
6165 
6166  if( IsTriangular<MT4>::value ) {
6167  ResultType_<MT3> tmp( serial( B ) );
6168  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6169  addAssign( C, tmp );
6170  }
6171  else if( IsTriangular<MT5>::value ) {
6172  ResultType_<MT3> tmp( serial( A ) );
6173  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6174  addAssign( C, tmp );
6175  }
6176  else {
6177  gemm( C, A, B, ET(scalar), ET(1) );
6178  }
6179  }
6180 #endif
6181  //**********************************************************************************************
6182 
6183  //**Restructuring addition assignment to row-major matrices*************************************
6198  template< typename MT > // Type of the target matrix
6200  addAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
6201  {
6203 
6205 
6206  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6207  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6208 
6209  const ForwardFunctor fwd;
6210 
6211  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
6212  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
6213 
6215  addAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
6216  else if( IsSymmetric<MT1>::value )
6217  addAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
6218  else
6219  addAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
6220  }
6221  //**********************************************************************************************
6222 
6223  //**Addition assignment to sparse matrices******************************************************
6224  // No special implementation for the addition assignment to sparse matrices.
6225  //**********************************************************************************************
6226 
6227  //**Subtraction assignment to dense matrices****************************************************
6239  template< typename MT // Type of the target dense matrix
6240  , bool SO > // Storage order of the target dense matrix
6242  subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
6243  {
6245 
6246  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6247  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6248 
6249  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
6250  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
6251 
6252  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
6253  return;
6254  }
6255 
6256  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
6257  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
6258 
6259  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
6260  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
6261  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
6262  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
6263  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
6264  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
6265 
6266  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
6267  }
6268  //**********************************************************************************************
6269 
6270  //**Subtraction assignment to dense matrices (kernel selection)*********************************
6281  template< typename MT3 // Type of the left-hand side target matrix
6282  , typename MT4 // Type of the left-hand side matrix operand
6283  , typename MT5 // Type of the right-hand side matrix operand
6284  , typename ST2 > // Type of the scalar value
6285  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6286  {
6287  if( ( IsDiagonal<MT4>::value ) ||
6288  ( !BLAZE_DEBUG_MODE && A.rows() <= SIMDSIZE*10UL ) ||
6289  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
6290  selectSmallSubAssignKernel( C, A, B, scalar );
6291  else
6292  selectBlasSubAssignKernel( C, A, B, scalar );
6293  }
6294  //**********************************************************************************************
6295 
6296  //**Default subtraction assignment to dense matrices (general/general)**************************
6310  template< typename MT3 // Type of the left-hand side target matrix
6311  , typename MT4 // Type of the left-hand side matrix operand
6312  , typename MT5 // Type of the right-hand side matrix operand
6313  , typename ST2 > // Type of the scalar value
6314  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
6315  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6316  {
6317  const ResultType tmp( serial( A * B * scalar ) );
6318  subAssign( C, tmp );
6319  }
6320  //**********************************************************************************************
6321 
6322  //**Default subtraction assignment to dense matrices (general/diagonal)*************************
6336  template< typename MT3 // Type of the left-hand side target matrix
6337  , typename MT4 // Type of the left-hand side matrix operand
6338  , typename MT5 // Type of the right-hand side matrix operand
6339  , typename ST2 > // Type of the scalar value
6340  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6341  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6342  {
6344 
6345  const size_t M( A.rows() );
6346  const size_t N( B.columns() );
6347 
6348  for( size_t j=0UL; j<N; ++j )
6349  {
6350  const size_t ibegin( ( IsLower<MT4>::value )
6351  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
6352  :( 0UL ) );
6353  const size_t iend( ( IsUpper<MT4>::value )
6354  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
6355  :( M ) );
6356  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
6357 
6358  const size_t inum( iend - ibegin );
6359  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
6360 
6361  for( size_t i=ibegin; i<ipos; i+=2UL ) {
6362  C(i ,j) -= A(i ,j) * B(j,j) * scalar;
6363  C(i+1UL,j) -= A(i+1UL,j) * B(j,j) * scalar;
6364  }
6365  if( ipos < iend ) {
6366  C(ipos,j) -= A(ipos,j) * B(j,j) * scalar;
6367  }
6368  }
6369  }
6370  //**********************************************************************************************
6371 
6372  //**Default subtraction assignment to dense matrices (diagonal/general)*************************
6386  template< typename MT3 // Type of the left-hand side target matrix
6387  , typename MT4 // Type of the left-hand side matrix operand
6388  , typename MT5 // Type of the right-hand side matrix operand
6389  , typename ST2 > // Type of the scalar value
6390  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
6391  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6392  {
6394 
6395  const size_t M( A.rows() );
6396  const size_t N( B.columns() );
6397 
6398  for( size_t j=0UL; j<N; ++j )
6399  {
6400  const size_t ibegin( ( IsLower<MT5>::value )
6401  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
6402  :( 0UL ) );
6403  const size_t iend( ( IsUpper<MT5>::value )
6404  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
6405  :( M ) );
6406  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
6407 
6408  const size_t inum( iend - ibegin );
6409  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
6410 
6411  for( size_t i=ibegin; i<ipos; i+=2UL ) {
6412  C(i ,j) -= A(i ,i ) * B(i ,j) * scalar;
6413  C(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
6414  }
6415  if( ipos < iend ) {
6416  C(ipos,j) -= A(ipos,ipos) * B(ipos,j) * scalar;
6417  }
6418  }
6419  }
6420  //**********************************************************************************************
6421 
6422  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
6436  template< typename MT3 // Type of the left-hand side target matrix
6437  , typename MT4 // Type of the left-hand side matrix operand
6438  , typename MT5 // Type of the right-hand side matrix operand
6439  , typename ST2 > // Type of the scalar value
6440  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
6441  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6442  {
6444 
6445  for( size_t i=0UL; i<A.rows(); ++i ) {
6446  C(i,i) -= A(i,i) * B(i,i) * scalar;
6447  }
6448  }
6449  //**********************************************************************************************
6450 
6451  //**Default subtraction assignment to dense matrices (small matrices)***************************
6465  template< typename MT3 // Type of the left-hand side target matrix
6466  , typename MT4 // Type of the left-hand side matrix operand
6467  , typename MT5 // Type of the right-hand side matrix operand
6468  , typename ST2 > // Type of the scalar value
6470  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6471  {
6472  selectDefaultSubAssignKernel( C, A, B, scalar );
6473  }
6474  //**********************************************************************************************
6475 
6476  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
6491  template< typename MT3 // Type of the left-hand side target matrix
6492  , typename MT4 // Type of the left-hand side matrix operand
6493  , typename MT5 // Type of the right-hand side matrix operand
6494  , typename ST2 > // Type of the scalar value
6496  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6497  {
6502 
6503  const ForwardFunctor fwd;
6504 
6506  const OppositeType_<MT5> tmp( serial( B ) );
6507  subAssign( ~C, fwd( A * tmp ) * scalar );
6508  }
6510  const OppositeType_<MT4> tmp( serial( A ) );
6511  subAssign( ~C, fwd( tmp * B ) * scalar );
6512  }
6513  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
6514  const OppositeType_<MT5> tmp( serial( B ) );
6515  subAssign( ~C, fwd( A * tmp ) * scalar );
6516  }
6517  else {
6518  const OppositeType_<MT4> tmp( serial( A ) );
6519  subAssign( ~C, fwd( tmp * B ) * scalar );
6520  }
6521  }
6522  //**********************************************************************************************
6523 
6524  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
6539  template< typename MT3 // Type of the left-hand side target matrix
6540  , typename MT4 // Type of the left-hand side matrix operand
6541  , typename MT5 // Type of the right-hand side matrix operand
6542  , typename ST2 > // Type of the scalar value
6544  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6545  {
6546  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT4>::value );
6547 
6548  const size_t M( A.rows() );
6549  const size_t N( B.columns() );
6550  const size_t K( A.columns() );
6551 
6552  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
6553 
6554  const size_t ipos( remainder ? ( M & size_t(-SIMDSIZE) ) : M );
6555  BLAZE_INTERNAL_ASSERT( !remainder || ( M - ( M % SIMDSIZE ) ) == ipos, "Invalid end calculation" );
6556 
6557  const SIMDType factor( set( scalar ) );
6558 
6559  size_t i( 0UL );
6560 
6562  {
6563  for( ; !LOW && !UPP && (i+SIMDSIZE*7UL) < ipos; i+=SIMDSIZE*8UL ) {
6564  for( size_t j=0UL; j<N; ++j )
6565  {
6566  const size_t kbegin( ( IsLower<MT5>::value )
6567  ?( ( IsUpper<MT4>::value )
6568  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6569  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6570  :( IsUpper<MT4>::value ? i : 0UL ) );
6571  const size_t kend( ( IsUpper<MT5>::value )
6572  ?( ( IsLower<MT4>::value )
6573  ?( min( i+SIMDSIZE*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
6574  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
6575  :( IsLower<MT4>::value ? min( i+SIMDSIZE*8UL, K ) : K ) );
6576 
6577  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6578 
6579  for( size_t k=kbegin; k<kend; ++k ) {
6580  const SIMDType b1( set( B(k,j) ) );
6581  xmm1 += A.load(i ,k) * b1;
6582  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
6583  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
6584  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
6585  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
6586  xmm6 += A.load(i+SIMDSIZE*5UL,k) * b1;
6587  xmm7 += A.load(i+SIMDSIZE*6UL,k) * b1;
6588  xmm8 += A.load(i+SIMDSIZE*7UL,k) * b1;
6589  }
6590 
6591  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
6592  (~C).store( i+SIMDSIZE , j, (~C).load(i+SIMDSIZE ,j) - xmm2 * factor );
6593  (~C).store( i+SIMDSIZE*2UL, j, (~C).load(i+SIMDSIZE*2UL,j) - xmm3 * factor );
6594  (~C).store( i+SIMDSIZE*3UL, j, (~C).load(i+SIMDSIZE*3UL,j) - xmm4 * factor );
6595  (~C).store( i+SIMDSIZE*4UL, j, (~C).load(i+SIMDSIZE*4UL,j) - xmm5 * factor );
6596  (~C).store( i+SIMDSIZE*5UL, j, (~C).load(i+SIMDSIZE*5UL,j) - xmm6 * factor );
6597  (~C).store( i+SIMDSIZE*6UL, j, (~C).load(i+SIMDSIZE*6UL,j) - xmm7 * factor );
6598  (~C).store( i+SIMDSIZE*7UL, j, (~C).load(i+SIMDSIZE*7UL,j) - xmm8 * factor );
6599  }
6600  }
6601  }
6602 
6603  for( ; !LOW && !UPP && (i+SIMDSIZE*4UL) < ipos; i+=SIMDSIZE*5UL )
6604  {
6605  size_t j( 0UL );
6606 
6607  for( ; (j+2UL) <= N; j+=2UL )
6608  {
6609  const size_t kbegin( ( IsLower<MT5>::value )
6610  ?( ( IsUpper<MT4>::value )
6611  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6612  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6613  :( IsUpper<MT4>::value ? i : 0UL ) );
6614  const size_t kend( ( IsUpper<MT5>::value )
6615  ?( ( IsLower<MT4>::value )
6616  ?( min( i+SIMDSIZE*5UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
6617  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
6618  :( IsLower<MT4>::value ? min( i+SIMDSIZE*5UL, K ) : K ) );
6619 
6620  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;
6621 
6622  for( size_t k=kbegin; k<kend; ++k ) {
6623  const SIMDType a1( A.load(i ,k) );
6624  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
6625  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
6626  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
6627  const SIMDType a5( A.load(i+SIMDSIZE*4UL,k) );
6628  const SIMDType b1( set( B(k,j ) ) );
6629  const SIMDType b2( set( B(k,j+1UL) ) );
6630  xmm1 += a1 * b1;
6631  xmm2 += a2 * b1;
6632  xmm3 += a3 * b1;
6633  xmm4 += a4 * b1;
6634  xmm5 += a5 * b1;
6635  xmm6 += a1 * b2;
6636  xmm7 += a2 * b2;
6637  xmm8 += a3 * b2;
6638  xmm9 += a4 * b2;
6639  xmm10 += a5 * b2;
6640  }
6641 
6642  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
6643  (~C).store( i+SIMDSIZE , j , (~C).load(i+SIMDSIZE ,j ) - xmm2 * factor );
6644  (~C).store( i+SIMDSIZE*2UL, j , (~C).load(i+SIMDSIZE*2UL,j ) - xmm3 * factor );
6645  (~C).store( i+SIMDSIZE*3UL, j , (~C).load(i+SIMDSIZE*3UL,j ) - xmm4 * factor );
6646  (~C).store( i+SIMDSIZE*4UL, j , (~C).load(i+SIMDSIZE*4UL,j ) - xmm5 * factor );
6647  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm6 * factor );
6648  (~C).store( i+SIMDSIZE , j+1UL, (~C).load(i+SIMDSIZE ,j+1UL) - xmm7 * factor );
6649  (~C).store( i+SIMDSIZE*2UL, j+1UL, (~C).load(i+SIMDSIZE*2UL,j+1UL) - xmm8 * factor );
6650  (~C).store( i+SIMDSIZE*3UL, j+1UL, (~C).load(i+SIMDSIZE*3UL,j+1UL) - xmm9 * factor );
6651  (~C).store( i+SIMDSIZE*4UL, j+1UL, (~C).load(i+SIMDSIZE*4UL,j+1UL) - xmm10 * factor );
6652  }
6653 
6654  if( j < N )
6655  {
6656  const size_t kbegin( ( IsLower<MT5>::value )
6657  ?( ( IsUpper<MT4>::value )
6658  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6659  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6660  :( IsUpper<MT4>::value ? i : 0UL ) );
6661  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*5UL, K ) ):( K ) );
6662 
6663  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5;
6664 
6665  for( size_t k=kbegin; k<kend; ++k ) {
6666  const SIMDType b1( set( B(k,j) ) );
6667  xmm1 += A.load(i ,k) * b1;
6668  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
6669  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
6670  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
6671  xmm5 += A.load(i+SIMDSIZE*4UL,k) * b1;
6672  }
6673 
6674  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
6675  (~C).store( i+SIMDSIZE , j, (~C).load(i+SIMDSIZE ,j) - xmm2 * factor );
6676  (~C).store( i+SIMDSIZE*2UL, j, (~C).load(i+SIMDSIZE*2UL,j) - xmm3 * factor );
6677  (~C).store( i+SIMDSIZE*3UL, j, (~C).load(i+SIMDSIZE*3UL,j) - xmm4 * factor );
6678  (~C).store( i+SIMDSIZE*4UL, j, (~C).load(i+SIMDSIZE*4UL,j) - xmm5 * factor );
6679  }
6680  }
6681 
6682  for( ; !LOW && !UPP && (i+SIMDSIZE*3UL) < ipos; i+=SIMDSIZE*4UL )
6683  {
6684  size_t j( 0UL );
6685 
6686  for( ; (j+2UL) <= N; j+=2UL )
6687  {
6688  const size_t kbegin( ( IsLower<MT5>::value )
6689  ?( ( IsUpper<MT4>::value )
6690  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6691  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6692  :( IsUpper<MT4>::value ? i : 0UL ) );
6693  const size_t kend( ( IsUpper<MT5>::value )
6694  ?( ( IsLower<MT4>::value )
6695  ?( min( i+SIMDSIZE*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
6696  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
6697  :( IsLower<MT4>::value ? min( i+SIMDSIZE*4UL, K ) : K ) );
6698 
6699  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6700 
6701  for( size_t k=kbegin; k<kend; ++k ) {
6702  const SIMDType a1( A.load(i ,k) );
6703  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
6704  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
6705  const SIMDType a4( A.load(i+SIMDSIZE*3UL,k) );
6706  const SIMDType b1( set( B(k,j ) ) );
6707  const SIMDType b2( set( B(k,j+1UL) ) );
6708  xmm1 += a1 * b1;
6709  xmm2 += a2 * b1;
6710  xmm3 += a3 * b1;
6711  xmm4 += a4 * b1;
6712  xmm5 += a1 * b2;
6713  xmm6 += a2 * b2;
6714  xmm7 += a3 * b2;
6715  xmm8 += a4 * b2;
6716  }
6717 
6718  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
6719  (~C).store( i+SIMDSIZE , j , (~C).load(i+SIMDSIZE ,j ) - xmm2 * factor );
6720  (~C).store( i+SIMDSIZE*2UL, j , (~C).load(i+SIMDSIZE*2UL,j ) - xmm3 * factor );
6721  (~C).store( i+SIMDSIZE*3UL, j , (~C).load(i+SIMDSIZE*3UL,j ) - xmm4 * factor );
6722  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
6723  (~C).store( i+SIMDSIZE , j+1UL, (~C).load(i+SIMDSIZE ,j+1UL) - xmm6 * factor );
6724  (~C).store( i+SIMDSIZE*2UL, j+1UL, (~C).load(i+SIMDSIZE*2UL,j+1UL) - xmm7 * factor );
6725  (~C).store( i+SIMDSIZE*3UL, j+1UL, (~C).load(i+SIMDSIZE*3UL,j+1UL) - xmm8 * factor );
6726  }
6727 
6728  if( j < N )
6729  {
6730  const size_t kbegin( ( IsLower<MT5>::value )
6731  ?( ( IsUpper<MT4>::value )
6732  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6733  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6734  :( IsUpper<MT4>::value ? i : 0UL ) );
6735  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*4UL, K ) ):( K ) );
6736 
6737  SIMDType xmm1, xmm2, xmm3, xmm4;
6738 
6739  for( size_t k=kbegin; k<kend; ++k ) {
6740  const SIMDType b1( set( B(k,j) ) );
6741  xmm1 += A.load(i ,k) * b1;
6742  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
6743  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
6744  xmm4 += A.load(i+SIMDSIZE*3UL,k) * b1;
6745  }
6746 
6747  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
6748  (~C).store( i+SIMDSIZE , j, (~C).load(i+SIMDSIZE ,j) - xmm2 * factor );
6749  (~C).store( i+SIMDSIZE*2UL, j, (~C).load(i+SIMDSIZE*2UL,j) - xmm3 * factor );
6750  (~C).store( i+SIMDSIZE*3UL, j, (~C).load(i+SIMDSIZE*3UL,j) - xmm4 * factor );
6751  }
6752  }
6753 
6754  for( ; !LOW && !UPP && (i+SIMDSIZE*2UL) < ipos; i+=SIMDSIZE*3UL )
6755  {
6756  size_t j( 0UL );
6757 
6758  for( ; (j+2UL) <= N; j+=2UL )
6759  {
6760  const size_t kbegin( ( IsLower<MT5>::value )
6761  ?( ( IsUpper<MT4>::value )
6762  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6763  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6764  :( IsUpper<MT4>::value ? i : 0UL ) );
6765  const size_t kend( ( IsUpper<MT5>::value )
6766  ?( ( IsLower<MT4>::value )
6767  ?( min( i+SIMDSIZE*3UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
6768  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
6769  :( IsLower<MT4>::value ? min( i+SIMDSIZE*3UL, K ) : K ) );
6770 
6771  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
6772 
6773  for( size_t k=kbegin; k<kend; ++k ) {
6774  const SIMDType a1( A.load(i ,k) );
6775  const SIMDType a2( A.load(i+SIMDSIZE ,k) );
6776  const SIMDType a3( A.load(i+SIMDSIZE*2UL,k) );
6777  const SIMDType b1( set( B(k,j ) ) );
6778  const SIMDType b2( set( B(k,j+1UL) ) );
6779  xmm1 += a1 * b1;
6780  xmm2 += a2 * b1;
6781  xmm3 += a3 * b1;
6782  xmm4 += a1 * b2;
6783  xmm5 += a2 * b2;
6784  xmm6 += a3 * b2;
6785  }
6786 
6787  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
6788  (~C).store( i+SIMDSIZE , j , (~C).load(i+SIMDSIZE ,j ) - xmm2 * factor );
6789  (~C).store( i+SIMDSIZE*2UL, j , (~C).load(i+SIMDSIZE*2UL,j ) - xmm3 * factor );
6790  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm4 * factor );
6791  (~C).store( i+SIMDSIZE , j+1UL, (~C).load(i+SIMDSIZE ,j+1UL) - xmm5 * factor );
6792  (~C).store( i+SIMDSIZE*2UL, j+1UL, (~C).load(i+SIMDSIZE*2UL,j+1UL) - xmm6 * factor );
6793  }
6794 
6795  if( j < N )
6796  {
6797  const size_t kbegin( ( IsLower<MT5>::value )
6798  ?( ( IsUpper<MT4>::value )
6799  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6800  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6801  :( IsUpper<MT4>::value ? i : 0UL ) );
6802  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*3UL, K ) ):( K ) );
6803 
6804  SIMDType xmm1, xmm2, xmm3;
6805 
6806  for( size_t k=kbegin; k<kend; ++k ) {
6807  const SIMDType b1( set( B(k,j) ) );
6808  xmm1 += A.load(i ,k) * b1;
6809  xmm2 += A.load(i+SIMDSIZE ,k) * b1;
6810  xmm3 += A.load(i+SIMDSIZE*2UL,k) * b1;
6811  }
6812 
6813  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
6814  (~C).store( i+SIMDSIZE , j, (~C).load(i+SIMDSIZE ,j) - xmm2 * factor );
6815  (~C).store( i+SIMDSIZE*2UL, j, (~C).load(i+SIMDSIZE*2UL,j) - xmm3 * factor );
6816  }
6817  }
6818 
6819  for( ; !( LOW && UPP ) && (i+SIMDSIZE) < ipos; i+=SIMDSIZE*2UL )
6820  {
6821  const size_t jend( LOW ? min(i+SIMDSIZE*2UL,N) : N );
6822  size_t j( UPP ? i : 0UL );
6823 
6824  for( ; (j+2UL) <= jend; j+=2UL )
6825  {
6826  const size_t kbegin( ( IsLower<MT5>::value )
6827  ?( ( IsUpper<MT4>::value )
6828  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6829  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6830  :( IsUpper<MT4>::value ? i : 0UL ) );
6831  const size_t kend( ( IsUpper<MT5>::value )
6832  ?( ( IsLower<MT4>::value )
6833  ?( min( i+SIMDSIZE*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
6834  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
6835  :( IsLower<MT4>::value ? min( i+SIMDSIZE*2UL, K ) : K ) );
6836 
6837  SIMDType xmm1, xmm2, xmm3, xmm4;
6838 
6839  for( size_t k=kbegin; k<kend; ++k ) {
6840  const SIMDType a1( A.load(i ,k) );
6841  const SIMDType a2( A.load(i+SIMDSIZE,k) );
6842  const SIMDType b1( set( B(k,j ) ) );
6843  const SIMDType b2( set( B(k,j+1UL) ) );
6844  xmm1 += a1 * b1;
6845  xmm2 += a2 * b1;
6846  xmm3 += a1 * b2;
6847  xmm4 += a2 * b2;
6848  }
6849 
6850  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
6851  (~C).store( i+SIMDSIZE, j , (~C).load(i+SIMDSIZE,j ) - xmm2 * factor );
6852  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
6853  (~C).store( i+SIMDSIZE, j+1UL, (~C).load(i+SIMDSIZE,j+1UL) - xmm4 * factor );
6854  }
6855 
6856  if( j < jend )
6857  {
6858  const size_t kbegin( ( IsLower<MT5>::value )
6859  ?( ( IsUpper<MT4>::value )
6860  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6861  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6862  :( IsUpper<MT4>::value ? i : 0UL ) );
6863  const size_t kend( ( IsLower<MT4>::value )?( min( i+SIMDSIZE*2UL, K ) ):( K ) );
6864 
6865  SIMDType xmm1, xmm2;
6866 
6867  for( size_t k=kbegin; k<kend; ++k ) {
6868  const SIMDType b1( set( B(k,j) ) );
6869  xmm1 += A.load(i ,k) * b1;
6870  xmm2 += A.load(i+SIMDSIZE,k) * b1;
6871  }
6872 
6873  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
6874  (~C).store( i+SIMDSIZE, j, (~C).load(i+SIMDSIZE,j) - xmm2 * factor );
6875  }
6876  }
6877 
6878  for( ; i<ipos; i+=SIMDSIZE )
6879  {
6880  const size_t jend( LOW && UPP ? min(i+SIMDSIZE,N) : N );
6881  size_t j( UPP ? i : 0UL );
6882 
6883  for( ; (j+2UL) <= jend; j+=2UL )
6884  {
6885  const size_t kbegin( ( IsLower<MT5>::value )
6886  ?( ( IsUpper<MT4>::value )
6887  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6888  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6889  :( IsUpper<MT4>::value ? i : 0UL ) );
6890  const size_t kend( ( IsUpper<MT5>::value )
6891  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
6892  :( K ) );
6893 
6894  SIMDType xmm1, xmm2;
6895 
6896  for( size_t k=kbegin; k<kend; ++k ) {
6897  const SIMDType a1( A.load(i,k) );
6898  xmm1 += a1 * set( B(k,j ) );
6899  xmm2 += a1 * set( B(k,j+1UL) );
6900  }
6901 
6902  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
6903  (~C).store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
6904  }
6905 
6906  if( j < jend )
6907  {
6908  const size_t kbegin( ( IsLower<MT5>::value )
6909  ?( ( IsUpper<MT4>::value )
6910  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6911  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6912  :( IsUpper<MT4>::value ? i : 0UL ) );
6913 
6914  SIMDType xmm1;
6915 
6916  for( size_t k=kbegin; k<K; ++k ) {
6917  xmm1 += A.load(i,k) * set( B(k,j) );
6918  }
6919 
6920  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
6921  }
6922  }
6923 
6924  for( ; remainder && i<M; ++i )
6925  {
6926  const size_t jend( LOW ? i+1UL : N );
6927  size_t j( UPP ? i : 0UL );
6928 
6929  for( ; (j+2UL) <= jend; j+=2UL )
6930  {
6931  const size_t kbegin( ( IsLower<MT5>::value )
6932  ?( ( IsUpper<MT4>::value )
6933  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6934  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6935  :( IsUpper<MT4>::value ? i : 0UL ) );
6936  const size_t kend( ( IsUpper<MT5>::value )
6937  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
6938  :( K ) );
6939 
6940  ElementType value1 = ElementType();
6941  ElementType value2 = ElementType();
6942 
6943  for( size_t k=kbegin; k<kend; ++k ) {
6944  value1 += A(i,k) * B(k,j );
6945  value2 += A(i,k) * B(k,j+1UL);
6946  }
6947 
6948  (~C)(i,j ) -= value1 * scalar;
6949  (~C)(i,j+1UL) -= value2 * scalar;
6950  }
6951 
6952  if( j < jend )
6953  {
6954  const size_t kbegin( ( IsLower<MT5>::value )
6955  ?( ( IsUpper<MT4>::value )
6956  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6957  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6958  :( IsUpper<MT4>::value ? i : 0UL ) );
6959 
6960  ElementType value = ElementType();
6961 
6962  for( size_t k=kbegin; k<K; ++k ) {
6963  value += A(i,k) * B(k,j);
6964  }
6965 
6966  (~C)(i,j) -= value * scalar;
6967  }
6968  }
6969  }
6970  //**********************************************************************************************
6971 
6972  //**Default subtraction assignment to dense matrices (large matrices)***************************
6986  template< typename MT3 // Type of the left-hand side target matrix
6987  , typename MT4 // Type of the left-hand side matrix operand
6988  , typename MT5 // Type of the right-hand side matrix operand
6989  , typename ST2 > // Type of the scalar value
6991  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6992  {
6993  selectDefaultSubAssignKernel( C, A, B, scalar );
6994  }
6995  //**********************************************************************************************
6996 
6997  //**Vectorized default subtraction assignment to dense matrices (large matrices)****************
7012  template< typename MT3 // Type of the left-hand side target matrix
7013  , typename MT4 // Type of the left-hand side matrix operand
7014  , typename MT5 // Type of the right-hand side matrix operand
7015  , typename ST2 > // Type of the scalar value
7017  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7018  {
7019  if( LOW )
7020  lmmm( C, A, B, -scalar, ST2(1) );
7021  else if( UPP )
7022  ummm( C, A, B, -scalar, ST2(1) );
7023  else
7024  mmm( C, A, B, -scalar, ST2(1) );
7025  }
7026  //**********************************************************************************************
7027 
7028  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
7043  template< typename MT3 // Type of the left-hand side target matrix
7044  , typename MT4 // Type of the left-hand side matrix operand
7045  , typename MT5 // Type of the right-hand side matrix operand
7046  , typename ST2 > // Type of the scalar value
7048  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7049  {
7050  selectLargeSubAssignKernel( C, A, B, scalar );
7051  }
7052  //**********************************************************************************************
7053 
7054  //**BLAS-based subraction assignment to dense matrices******************************************
7055 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
7056 
7069  template< typename MT3 // Type of the left-hand side target matrix
7070  , typename MT4 // Type of the left-hand side matrix operand
7071  , typename MT5 // Type of the right-hand side matrix operand
7072  , typename ST2 > // Type of the scalar value
7074  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7075  {
7076  typedef ElementType_<MT3> ET;
7077 
7078  if( IsTriangular<MT4>::value ) {
7079  ResultType_<MT3> tmp( serial( B ) );
7080  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7081  subAssign( C, tmp );
7082  }
7083  else if( IsTriangular<MT5>::value ) {
7084  ResultType_<MT3> tmp( serial( A ) );
7085  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7086  subAssign( C, tmp );
7087  }
7088  else {
7089  gemm( C, A, B, ET(-scalar), ET(1) );
7090  }
7091  }
7092 #endif
7093  //**********************************************************************************************
7094 
7095  //**Restructuring subtraction assignment to row-major matrices**********************************
7109  template< typename MT > // Type of the target matrix
7111  subAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
7112  {
7114 
7116 
7117  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7118  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7119 
7120  const ForwardFunctor fwd;
7121 
7122  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7123  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7124 
7126  subAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
7127  else if( IsSymmetric<MT1>::value )
7128  subAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
7129  else
7130  subAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
7131  }
7132  //**********************************************************************************************
7133 
7134  //**Subtraction assignment to sparse matrices***************************************************
7135  // No special implementation for the subtraction assignment to sparse matrices.
7136  //**********************************************************************************************
7137 
7138  //**Multiplication assignment to dense matrices*************************************************
7139  // No special implementation for the multiplication assignment to dense matrices.
7140  //**********************************************************************************************
7141 
7142  //**Multiplication assignment to sparse matrices************************************************
7143  // No special implementation for the multiplication assignment to sparse matrices.
7144  //**********************************************************************************************
7145 
7146  //**SMP assignment to dense matrices************************************************************
7161  template< typename MT // Type of the target dense matrix
7162  , bool SO > // Storage order of the target dense matrix
7164  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7165  {
7167 
7168  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7169  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7170 
7171  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7172  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7173 
7174  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
7175  return;
7176  }
7177  else if( left.columns() == 0UL ) {
7178  reset( ~lhs );
7179  return;
7180  }
7181 
7182  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7183  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7184 
7185  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7186  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7187  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7188  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7189  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7190  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7191 
7192  smpAssign( ~lhs, A * B * rhs.scalar_ );
7193  }
7194  //**********************************************************************************************
7195 
7196  //**SMP assignment to sparse matrices***********************************************************
7211  template< typename MT // Type of the target sparse matrix
7212  , bool SO > // Storage order of the target sparse matrix
7215  {
7217 
7219 
7226 
7227  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7228  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7229 
7230  const ForwardFunctor fwd;
7231 
7232  const TmpType tmp( rhs );
7233  smpAssign( ~lhs, fwd( tmp ) );
7234  }
7235  //**********************************************************************************************
7236 
7237  //**Restructuring SMP assignment to row-major matrices******************************************
7251  template< typename MT > // Type of the target matrix
7253  smpAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
7254  {
7256 
7258 
7259  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7260  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7261 
7262  const ForwardFunctor fwd;
7263 
7264  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7265  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7266 
7268  smpAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
7269  else if( IsSymmetric<MT1>::value )
7270  smpAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
7271  else
7272  smpAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
7273  }
7274  //**********************************************************************************************
7275 
7276  //**SMP addition assignment to dense matrices***************************************************
7291  template< typename MT // Type of the target dense matrix
7292  , bool SO > // Storage order of the target dense matrix
7295  {
7297 
7298  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7299  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7300 
7301  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7302  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7303 
7304  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7305  return;
7306  }
7307 
7308  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7309  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7310 
7311  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7312  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7313  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7314  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7315  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7316  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7317 
7318  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
7319  }
7320  //**********************************************************************************************
7321 
7322  //**Restructuring SMP addition assignment to row-major matrices*********************************
7337  template< typename MT > // Type of the target matrix
7340  {
7342 
7344 
7345  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7346  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7347 
7348  const ForwardFunctor fwd;
7349 
7350  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7351  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7352 
7354  smpAddAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
7355  else if( IsSymmetric<MT1>::value )
7356  smpAddAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
7357  else
7358  smpAddAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
7359  }
7360  //**********************************************************************************************
7361 
7362  //**SMP addition assignment to sparse matrices**************************************************
7363  // No special implementation for the SMP addition assignment to sparse matrices.
7364  //**********************************************************************************************
7365 
7366  //**SMP subtraction assignment to dense matrices************************************************
7381  template< typename MT // Type of the target dense matrix
7382  , bool SO > // Storage order of the target dense matrix
7385  {
7387 
7388  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7389  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7390 
7391  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7392  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7393 
7394  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7395  return;
7396  }
7397 
7398  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7399  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7400 
7401  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7402  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7403  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7404  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7405  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7406  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7407 
7408  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
7409  }
7410  //**********************************************************************************************
7411 
7412  //**Restructuring SMP subtraction assignment to row-major matrices******************************
7427  template< typename MT > // Type of the target matrix
7430  {
7432 
7434 
7435  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7436  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7437 
7438  const ForwardFunctor fwd;
7439 
7440  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7441  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7442 
7444  smpSubAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
7445  else if( IsSymmetric<MT1>::value )
7446  smpSubAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
7447  else
7448  smpSubAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
7449  }
7450  //**********************************************************************************************
7451 
7452  //**SMP subtraction assignment to sparse matrices***********************************************
7453  // No special implementation for the SMP subtraction assignment to sparse matrices.
7454  //**********************************************************************************************
7455 
7456  //**SMP multiplication assignment to dense matrices*********************************************
7457  // No special implementation for the SMP multiplication assignment to dense matrices.
7458  //**********************************************************************************************
7459 
7460  //**SMP multiplication assignment to sparse matrices********************************************
7461  // No special implementation for the SMP multiplication assignment to sparse matrices.
7462  //**********************************************************************************************
7463 
7464  //**Compile time checks*************************************************************************
7472  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
7473  //**********************************************************************************************
7474 };
7476 //*************************************************************************************************
7477 
7478 
7479 
7480 
7481 //=================================================================================================
7482 //
7483 // GLOBAL BINARY ARITHMETIC OPERATORS
7484 //
7485 //=================================================================================================
7486 
7487 //*************************************************************************************************
7514 template< typename T1 // Type of the left-hand side dense matrix
7515  , typename T2 > // Type of the right-hand side dense matrix
7518 {
7520 
7521  if( (~lhs).columns() != (~rhs).rows() ) {
7522  BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
7523  }
7524 
7526 }
7527 //*************************************************************************************************
7528 
7529 
7530 
7531 
7532 //=================================================================================================
7533 //
7534 // GLOBAL FUNCTIONS
7535 //
7536 //=================================================================================================
7537 
7538 //*************************************************************************************************
7561 template< typename MT1 // Type of the left-hand side dense matrix
7562  , typename MT2 // Type of the right-hand side dense matrix
7563  , bool SF // Symmetry flag
7564  , bool HF // Hermitian flag
7565  , bool LF // Lower flag
7566  , bool UF > // Upper flag
7569 {
7571 
7572  if( !isSquare( dm ) ) {
7573  BLAZE_THROW_INVALID_ARGUMENT( "Invalid symmetric matrix specification" );
7574  }
7575 
7576  return TDMatTDMatMultExpr<MT1,MT2,true,HF,LF,UF>( dm.leftOperand(), dm.rightOperand() );
7577 }
7579 //*************************************************************************************************
7580 
7581 
7582 //*************************************************************************************************
7605 template< typename MT1 // Type of the left-hand side dense matrix
7606  , typename MT2 // Type of the right-hand side dense matrix
7607  , bool SF // Symmetry flag
7608  , bool HF // Hermitian flag
7609  , bool LF // Lower flag
7610  , bool UF > // Upper flag
7613 {
7615 
7616  if( !isSquare( dm ) ) {
7617  BLAZE_THROW_INVALID_ARGUMENT( "Invalid Hermitian matrix specification" );
7618  }
7619 
7620  return TDMatTDMatMultExpr<MT1,MT2,SF,true,LF,UF>( dm.leftOperand(), dm.rightOperand() );
7621 }
7623 //*************************************************************************************************
7624 
7625 
7626 //*************************************************************************************************
7649 template< typename MT1 // Type of the left-hand side dense matrix
7650  , typename MT2 // Type of the right-hand side dense matrix
7651  , bool SF // Symmetry flag
7652  , bool HF // Hermitian flag
7653  , bool LF // Lower flag
7654  , bool UF > // Upper flag
7657 {
7659 
7660  if( !isSquare( dm ) ) {
7661  BLAZE_THROW_INVALID_ARGUMENT( "Invalid lower matrix specification" );
7662  }
7663 
7664  return TDMatTDMatMultExpr<MT1,MT2,SF,HF,true,UF>( dm.leftOperand(), dm.rightOperand() );
7665 }
7667 //*************************************************************************************************
7668 
7669 
7670 //*************************************************************************************************
7693 template< typename MT1 // Type of the left-hand side dense matrix
7694  , typename MT2 // Type of the right-hand side dense matrix
7695  , bool SF // Symmetry flag
7696  , bool HF // Hermitian flag
7697  , bool LF // Lower flag
7698  , bool UF > // Upper flag
7701 {
7703 
7704  if( !isSquare( dm ) ) {
7705  BLAZE_THROW_INVALID_ARGUMENT( "Invalid upper matrix specification" );
7706  }
7707 
7708  return TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,true>( dm.leftOperand(), dm.rightOperand() );
7709 }
7711 //*************************************************************************************************
7712 
7713 
7714 //*************************************************************************************************
7737 template< typename MT1 // Type of the left-hand side dense matrix
7738  , typename MT2 // Type of the right-hand side dense matrix
7739  , bool SF // Symmetry flag
7740  , bool HF // Hermitian flag
7741  , bool LF // Lower flag
7742  , bool UF > // Upper flag
7745 {
7747 
7748  if( !isSquare( dm ) ) {
7749  BLAZE_THROW_INVALID_ARGUMENT( "Invalid diagonal matrix specification" );
7750  }
7751 
7752  return TDMatTDMatMultExpr<MT1,MT2,SF,HF,true,true>( dm.leftOperand(), dm.rightOperand() );
7753 }
7755 //*************************************************************************************************
7756 
7757 
7758 
7759 
7760 //=================================================================================================
7761 //
7762 // ROWS SPECIALIZATIONS
7763 //
7764 //=================================================================================================
7765 
7766 //*************************************************************************************************
7768 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7769 struct Rows< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> > : public Rows<MT1>
7770 {};
7772 //*************************************************************************************************
7773 
7774 
7775 
7776 
7777 //=================================================================================================
7778 //
7779 // COLUMNS SPECIALIZATIONS
7780 //
7781 //=================================================================================================
7782 
7783 //*************************************************************************************************
7785 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7786 struct Columns< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> > : public Columns<MT2>
7787 {};
7789 //*************************************************************************************************
7790 
7791 
7792 
7793 
7794 //=================================================================================================
7795 //
7796 // ISALIGNED SPECIALIZATIONS
7797 //
7798 //=================================================================================================
7799 
7800 //*************************************************************************************************
7802 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7803 struct IsAligned< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7804  : public BoolConstant< And< IsAligned<MT1>, IsAligned<MT2> >::value >
7805 {};
7807 //*************************************************************************************************
7808 
7809 
7810 
7811 
7812 //=================================================================================================
7813 //
7814 // ISSYMMETRIC SPECIALIZATIONS
7815 //
7816 //=================================================================================================
7817 
7818 //*************************************************************************************************
7820 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7821 struct IsSymmetric< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7822  : public BoolConstant< Or< Bool<SF>
7823  , And< Bool<HF>
7824  , IsBuiltin< ElementType_< TDMatTDMatMultExpr<MT1,MT2,false,true,false,false> > > >
7825  , And< Bool<LF>, Bool<UF> > >::value >
7826 {};
7828 //*************************************************************************************************
7829 
7830 
7831 
7832 
7833 //=================================================================================================
7834 //
7835 // ISHERMITIAN SPECIALIZATIONS
7836 //
7837 //=================================================================================================
7838 
7839 //*************************************************************************************************
7841 template< typename MT1, typename MT2, bool SF, bool LF, bool UF >
7842 struct IsHermitian< TDMatTDMatMultExpr<MT1,MT2,SF,true,LF,UF> >
7843  : public TrueType
7844 {};
7846 //*************************************************************************************************
7847 
7848 
7849 
7850 
7851 //=================================================================================================
7852 //
7853 // ISLOWER SPECIALIZATIONS
7854 //
7855 //=================================================================================================
7856 
7857 //*************************************************************************************************
7859 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7860 struct IsLower< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7861  : public BoolConstant< Or< Bool<LF>
7862  , And< IsLower<MT1>, IsLower<MT2> >
7863  , And< Or< Bool<SF>, Bool<HF> >
7864  , IsUpper<MT1>, IsUpper<MT2> > >::value >
7865 {};
7867 //*************************************************************************************************
7868 
7869 
7870 
7871 
7872 //=================================================================================================
7873 //
7874 // ISUNILOWER SPECIALIZATIONS
7875 //
7876 //=================================================================================================
7877 
7878 //*************************************************************************************************
7880 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7881 struct IsUniLower< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7882  : public BoolConstant< Or< And< IsUniLower<MT1>, IsUniLower<MT2> >
7883  , And< Or< Bool<SF>, Bool<HF> >
7884  , IsUniUpper<MT1>, IsUniUpper<MT2> > >::value >
7885 {};
7887 //*************************************************************************************************
7888 
7889 
7890 
7891 
7892 //=================================================================================================
7893 //
7894 // ISSTRICTLYLOWER SPECIALIZATIONS
7895 //
7896 //=================================================================================================
7897 
7898 //*************************************************************************************************
7900 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7901 struct IsStrictlyLower< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7902  : public BoolConstant< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
7903  , And< IsStrictlyLower<MT2>, IsLower<MT1> >
7904  , And< Or< Bool<SF>, Bool<HF> >
7905  , Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
7906  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > > > >::value >
7907 {};
7909 //*************************************************************************************************
7910 
7911 
7912 
7913 
7914 //=================================================================================================
7915 //
7916 // ISUPPER SPECIALIZATIONS
7917 //
7918 //=================================================================================================
7919 
7920 //*************************************************************************************************
7922 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7923 struct IsUpper< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7924  : public BoolConstant< Or< Bool<UF>
7925  , And< IsUpper<MT1>, IsUpper<MT2> >
7926  , And< Or< Bool<SF>, Bool<HF> >
7927  , IsLower<MT1>, IsLower<MT2> > >::value >
7928 {};
7930 //*************************************************************************************************
7931 
7932 
7933 
7934 
7935 //=================================================================================================
7936 //
7937 // ISUNIUPPER SPECIALIZATIONS
7938 //
7939 //=================================================================================================
7940 
7941 //*************************************************************************************************
7943 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7944 struct IsUniUpper< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7945  : public BoolConstant< Or< And< IsUniUpper<MT1>, IsUniUpper<MT2> >
7946  , And< Or< Bool<SF>, Bool<HF> >
7947  , IsUniLower<MT1>, IsUniLower<MT2> > >::value >
7948 {};
7950 //*************************************************************************************************
7951 
7952 
7953 
7954 
7955 //=================================================================================================
7956 //
7957 // ISSTRICTLYUPPER SPECIALIZATIONS
7958 //
7959 //=================================================================================================
7960 
7961 //*************************************************************************************************
7963 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7964 struct IsStrictlyUpper< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7965  : public BoolConstant< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
7966  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> >
7967  , And< Or< Bool<SF>, Bool<HF> >
7968  , Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
7969  , And< IsStrictlyLower<MT2>, IsLower<MT1> > > > >::value >
7970 {};
7972 //*************************************************************************************************
7973 
7974 
7975 
7976 
7977 //=================================================================================================
7978 //
7979 // EXPRESSION TRAIT SPECIALIZATIONS
7980 //
7981 //=================================================================================================
7982 
7983 //*************************************************************************************************
7985 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF, typename VT >
7986 struct TDMatDVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, VT >
7987 {
7988  public:
7989  //**********************************************************************************************
7994  , INVALID_TYPE >;
7995  //**********************************************************************************************
7996 };
7998 //*************************************************************************************************
7999 
8000 
8001 //*************************************************************************************************
8003 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF, typename VT >
8004 struct TDMatSVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, VT >
8005 {
8006  public:
8007  //**********************************************************************************************
8012  , INVALID_TYPE >;
8013  //**********************************************************************************************
8014 };
8016 //*************************************************************************************************
8017 
8018 
8019 //*************************************************************************************************
8021 template< typename VT, typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8022 struct TDVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8023 {
8024  public:
8025  //**********************************************************************************************
8030  , INVALID_TYPE >;
8031  //**********************************************************************************************
8032 };
8034 //*************************************************************************************************
8035 
8036 
8037 //*************************************************************************************************
8039 template< typename VT, typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8040 struct TSVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8041 {
8042  public:
8043  //**********************************************************************************************
8048  , INVALID_TYPE >;
8049  //**********************************************************************************************
8050 };
8052 //*************************************************************************************************
8053 
8054 
8055 //*************************************************************************************************
8057 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8058 struct TDMatDeclSymExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8059 {
8060  public:
8061  //**********************************************************************************************
8065  , INVALID_TYPE >;
8066  //**********************************************************************************************
8067 };
8069 //*************************************************************************************************
8070 
8071 
8072 //*************************************************************************************************
8074 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8075 struct TDMatDeclHermExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8076 {
8077  public:
8078  //**********************************************************************************************
8082  , INVALID_TYPE >;
8083  //**********************************************************************************************
8084 };
8086 //*************************************************************************************************
8087 
8088 
8089 //*************************************************************************************************
8091 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8092 struct TDMatDeclLowExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8093 {
8094  public:
8095  //**********************************************************************************************
8099  , INVALID_TYPE >;
8100  //**********************************************************************************************
8101 };
8103 //*************************************************************************************************
8104 
8105 
8106 //*************************************************************************************************
8108 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8109 struct TDMatDeclUppExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8110 {
8111  public:
8112  //**********************************************************************************************
8116  , INVALID_TYPE >;
8117  //**********************************************************************************************
8118 };
8120 //*************************************************************************************************
8121 
8122 
8123 //*************************************************************************************************
8125 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8126 struct TDMatDeclDiagExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8127 {
8128  public:
8129  //**********************************************************************************************
8133  , INVALID_TYPE >;
8134  //**********************************************************************************************
8135 };
8137 //*************************************************************************************************
8138 
8139 
8140 //*************************************************************************************************
8142 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF, bool AF >
8143 struct SubmatrixExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, AF >
8144 {
8145  public:
8146  //**********************************************************************************************
8149  //**********************************************************************************************
8150 };
8152 //*************************************************************************************************
8153 
8154 
8155 //*************************************************************************************************
8157 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8158 struct RowExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8159 {
8160  public:
8161  //**********************************************************************************************
8162  using Type = MultExprTrait_< RowExprTrait_<const MT1>, MT2 >;
8163  //**********************************************************************************************
8164 };
8166 //*************************************************************************************************
8167 
8168 
8169 //*************************************************************************************************
8171 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8172 struct ColumnExprTrait< TDMatTDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8173 {
8174  public:
8175  //**********************************************************************************************
8177  //**********************************************************************************************
8178 };
8180 //*************************************************************************************************
8181 
8182 } // namespace blaze
8183 
8184 #endif
typename SubmatrixExprTrait< MT, AF >::Type SubmatrixExprTrait_
Auxiliary alias declaration for the SubmatrixExprTrait type trait.The SubmatrixExprTrait_ alias decla...
Definition: SubmatrixExprTrait.h:134
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
Evaluation of the expression type of a dense matrix declherm operation.Via this type trait it is poss...
Definition: TDMatDeclHermExprTrait.h:75
Compile time check for row vector types.This type trait tests whether or not the given template argum...
Definition: IsRowVector.h:80
const DMatForEachExpr< MT, Conj, SO > conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatForEachExpr.h:1214
If_< IsExpression< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:310
ResultType_< MT1 > RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:174
Header file for auxiliary alias declarations.
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:72
Constraint on the data type.
Evaluation of the expression type of a dense matrix decllow operation.Via this type trait it is possi...
Definition: TDMatDeclLowExprTrait.h:75
Header file for kernel specific block sizes.
Header file for mathematical functions.
constexpr bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
If_< IsExpression< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:307
Header file for the Rows type trait.
Header file for the IsUniUpper type trait.
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:87
Header file for basic type definitions.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:505
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:304
Flag for Hermitian matrices.
Definition: TDMatTDMatMultExpr.h:196
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTDMatMultExpr.h:495
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the serial shim.
Header file for the IsDiagonal type trait.
ElementType_< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:177
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: DMatScalarMultExpr.h:560
Generic wrapper for a compile time constant integral value.The IntegralConstant class template repres...
Definition: IntegralConstant.h:71
MultTrait_< RT1, RT2 > ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:298
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the ColumnExprTrait class template.
Header file for the DeclUpp functor.
Header file for the IsSame and IsStrictlySame type traits.
CompositeType_< MT1 > CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:178
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:194
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:162
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:315
Header file for the IsColumnMajorMatrix type trait.
RightOperand scalar_
Right-hand side scalar of the multiplication expression.
Definition: DMatScalarMultExpr.h:633
Header file for the dense matrix multiplication kernels.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:533
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: DMatScalarMultExpr.h:550
Header file for the IsRowVector type trait.
typename DisableIf< Condition, T >::Type DisableIf_
Auxiliary type for the DisableIf class template.The DisableIf_ alias declaration provides a convenien...
Definition: DisableIf.h:223
Header file for the IsIntegral type trait.
Base class for all matrix/scalar multiplication expression templates.The MatScalarMultExpr class serv...
Definition: MatScalarMultExpr.h:66
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1755
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:342
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:88
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:163
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
Evaluation of the expression type of a sparse vector/transpose dense matrix multiplication.Via this type trait it is possible to evaluate the resulting expression type of a sparse vector/transpose dense matrix multiplication. Given the transpose sparse vector type VT and the column-major dense matrix type MT, the nested type Type corresponds to the resulting expression type. In case either VT is not a transpose sparse vector type or MT is not a column-major dense matrix type, the resulting data type Type is set to INVALID_TYPE.
Definition: TSVecTDMatMultExprTrait.h:81
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:245
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member constant is set to true, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to false, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:138
DisableIf_< IsSymmetric< MT >, const DMatDeclSymExpr< MT, SO > > declsym(const DenseMatrix< MT, SO > &dm)
Declares the given non-symmetric dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:841
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:144
Flag for symmetric matrices.
Definition: TDMatTDMatMultExpr.h:195
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:88
Constraints on the storage order of matrix types.
IfTrue_< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:313
Header file for the TDMatDeclDiagExprTrait class template.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Header file for the IsUniLower type trait.
Header file for the IsBLASCompatible type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:323
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1802
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
LeftOperand leftOperand() const noexcept
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:441
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:71
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:119
Constraint on the data type.
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
Header file for the IsComplexDouble type trait.
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
Constraint on the data type.
typename MultExprTrait< T1, T2 >::Type MultExprTrait_
Auxiliary alias declaration for the MultExprTrait class template.The MultExprTrait_ alias declaration...
Definition: MultExprTrait.h:344
Header file for the MultExprTrait class template.
DisableIf_< IsHermitian< MT >, const DMatDeclHermExpr< MT, SO > > declherm(const DenseMatrix< MT, SO > &dm)
Declares the given non-Hermitian dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:841
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:72
SubvectorExprTrait_< VT, unaligned > subvector(Vector< VT, TF > &vector, size_t index, size_t size)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:152
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
Compile time check for upper unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniUpper.h:86
Flag for upper matrices.
Definition: TDMatTDMatMultExpr.h:198
Header file for the DisableIf class template.
Compile time check for dense vector types.This type trait tests whether or not the given template par...
Definition: IsDenseVector.h:78
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsSymmetric type trait.
RightOperand rightOperand() const noexcept
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:451
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the DeclLow functor.
Header file for the IsDouble type trait.
Header file for the If class template.
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:300
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:83
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
Header file for the TSVecTDMatMultExprTrait class template.
Evaluation of the expression type of a dense matrix declupp operation.Via this type trait it is possi...
Definition: TDMatDeclUppExprTrait.h:75
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the Or class template.
Expression object for dense matrix-scalar multiplications.The DMatScalarMultExpr class represents the...
Definition: DMatScalarMultExpr.h:123
Header file for the TDMatSVecMultExprTrait class template.
Header file for the TDMatDeclHermExprTrait class template.
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:475
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
Header file for the Columns type trait.
Header file for the Not class template.
Flag for lower matrices.
Definition: TDMatTDMatMultExpr.h:197
Header file for the TDMatDeclUppExprTrait class template.
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
Evaluation of the expression type of a dense matrix decldiag operation.Via this type trait it is poss...
Definition: TDMatDeclDiagExprTrait.h:75
Compile time check for sparse vector types.This type trait tests whether or not the given template pa...
Definition: IsSparseVector.h:78
Evaluation of the expression type type of a submatrix operation.Via this type trait it is possible to...
Definition: SubmatrixExprTrait.h:80
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:303
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:71
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:90
Compile time check for strictly triangular matrix types.This type trait tests whether or not the give...
Definition: IsStrictlyTriangular.h:87
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBLASCompatible.h:80
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT >, IsDeclExpr< MT > >, RowExprTrait_< MT > > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:128
Header file for the IsStrictlyTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:60
Generic wrapper for the null function.
Definition: Noop.h:58
Header file for the IsTriangular type trait.
Compile time check for column vector types.This type trait tests whether or not the given template ar...
Definition: IsColumnVector.h:80
Evaluation of the expression type of a dense matrix declsym operation.Via this type trait it is possi...
Definition: TDMatDeclSymExprTrait.h:75
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Header file for the exception macros of the math module.
DisableIf_< IsLower< MT >, const DMatDeclLowExpr< MT, SO > > decllow(const DenseMatrix< MT, SO > &dm)
Declares the given non-lower dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:842
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Evaluation of the expression type type of a row operation.Via this type trait it is possible to evalu...
Definition: RowExprTrait.h:79
LeftOperand matrix_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatScalarMultExpr.h:632
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:260
Header file for the DeclDiag functor.
Compile time check for dense matrix types.This type trait tests whether or not the given template par...
Definition: IsDenseMatrix.h:78
CompositeType_< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:179
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:506
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT >, IsDeclExpr< MT > >, ColumnExprTrait_< MT > > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:128
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
#define BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
Compilation switch for the BLAS matrix/matrix multiplication kernels (gemv).This compilation switch e...
Definition: BLAS.h:93
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:109
IfTrue_< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:316
Compile time check for lower unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniLower.h:86
Header file for the conjugate shim.
Header file for the IsNumeric type trait.
typename T::LeftOperand LeftOperand_
Alias declaration for nested LeftOperand type definitions.The LeftOperand_ alias declaration provides...
Definition: Aliases.h:203
Header file for the HasConstDataAccess type trait.
Compile time check for resizable data types.This type trait tests whether the given data type is a re...
Definition: IsResizable.h:75
System settings for the BLAS mode.
Evaluation of the expression type of a transpose dense matrix/sparse vector multiplication.Via this type trait it is possible to evaluate the resulting expression type of a transpose dense matrix/sparse vector multiplication. Given the column-major dense matrix type MT and the non-transpose sparse vector type VT, the nested type Type corresponds to the resulting expression type. In case either MT is not a column-major dense matrix type or VT is not a non-transpose sparse vector type, the resulting data type Type is set to INVALID_TYPE.
Definition: TDMatSVecMultExprTrait.h:79
Header file for the IsSIMDCombinable type trait.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:431
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:79
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
Header file for the MatScalarMultExpr base class.
typename TDVecTDMatMultExprTrait< VT, MT >::Type TDVecTDMatMultExprTrait_
Auxiliary alias declaration for the TDVecTDMatMultExprTrait class template.The TDVecTDMatMultExprTrai...
Definition: TDVecTDMatMultExprTrait.h:120
ElementType_< ResultType > ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:301
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:83
Utility type for generic codes.
Header file for the TDMatDeclLowExprTrait class template.
ResultType_< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:175
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:160
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:61
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:405
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:93
Compile time type negation.The Not class template negates the given compile time condition. In case the given condition would evaluate to true, the nested member enumeration is set to false and vice versa:
Definition: Not.h:70
Compile time check for Hermitian matrices.This type trait tests whether or not the given template par...
Definition: IsHermitian.h:85
Compile time check for built-in data types.This type trait tests whether or not the given template pa...
Definition: IsBuiltin.h:75
Compile time check for integral data types.This type trait tests whether or not the given template pa...
Definition: IsIntegral.h:75
Base class for matrices.The Matrix class is a base class for all dense and sparse matrix classes with...
Definition: Forward.h:94
TDMatTDMatMultExpr< MT1, MT2, SF, HF, LF, UF > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:296
Constraint on the data type.
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: TDMatTDMatMultExpr.h:302
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
Header file for the TDMatDeclSymExprTrait class template.
Header file for the HasMutableDataAccess type trait.
Header file for the Noop functor.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:113
Header file for BLAS triangular matrix/matrix multiplication functions (trmm)
typename TDMatDVecMultExprTrait< MT, VT >::Type TDMatDVecMultExprTrait_
Auxiliary alias declaration for the TDMatDVecMultExprTrait class template.The TDMatDVecMultExprTrait_...
Definition: TDMatDVecMultExprTrait.h:120
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:223
typename T::OppositeType OppositeType_
Alias declaration for nested OppositeType type definitions.The OppositeType_ alias declaration provid...
Definition: Aliases.h:243
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Header file for the IsDenseVector type trait.
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
ElementType_< RT1 > ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:176
Evaluation of the expression type of a dense vector/transpose dense matrix multiplication.Via this type trait it is possible to evaluate the resulting expression type of a dense vector/transpose dense matrix multiplication. Given the transpose dense vector type VT and the column-major dense matrix type MT, the nested type Type corresponds to the resulting expression type. In case either VT is not a transpose dense vector type or MT is not a column-major dense matrix type, the resulting data type Type is set to INVALID_TYPE.
Definition: TDVecTDMatMultExprTrait.h:79
Compile time check for complex types.This type trait tests whether or not the given template paramete...
Definition: IsComplex.h:76
Header file for the IsRowMajorMatrix type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:55
OppositeType_< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:299
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:733
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDMatDVecMultExprTrait class template.
Evaluation of the expression type of a transpose dense matrix/dense vector multiplication.Via this type trait it is possible to evaluate the resulting expression type of a transpose dense matrix/dense vector multiplication. Given the column-major dense matrix type MT and the non-transpose dense vector type VT, the nested type Type corresponds to the resulting expression type. In case either MT is not a column-major dense matrix type or VT is not a non-transpose dense vector type, the resulting data type Type is set to INVALID_TYPE.
Definition: TDMatDVecMultExprTrait.h:79
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:421
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:463
Header file for BLAS general matrix/matrix multiplication functions (gemm)
Header file for the IsComplexFloat type trait.
Header file for the IntegralConstant class template.
Compile time evaluation of the number of columns of a matrix.The Columns type trait evaluates the num...
Definition: Columns.h:76
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:357
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
Compile time evaluation of the number of rows of a matrix.The Rows type trait evaluates the number of...
Definition: Rows.h:76
Header file for the IsComplex type trait.
Header file for the DeclHerm functor.
Header file for the complex data type.
typename T::RightOperand RightOperand_
Alias declaration for nested RightOperand type definitions.The RightOperand_ alias declaration provid...
Definition: Aliases.h:363
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:403
Header file for the IsUpper type trait.
Header file for the IsColumnVector type trait.
System settings for the debugging policy of the Blaze library.
Constraint on the data type.
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTDMatMultExpr.h:485
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
BLAZE_ALWAYS_INLINE bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:677
Header file for the IsResizable type trait.
const DMatDMatMultExpr< T1, T2, false, false, false, false > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7505
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
DisableIf_< IsDiagonal< MT >, const DMatDeclDiagExpr< MT, SO > > decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given non-diagonal dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:841
DisableIf_< IsUpper< MT >, const DMatDeclUppExpr< MT, SO > > declupp(const DenseMatrix< MT, SO > &dm)
Declares the given non-upper dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:842
Evaluation of the expression type type of a column operation.Via this type trait it is possible to ev...
Definition: ColumnExprTrait.h:78
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the Bool class template.
Header file for the TDVecTDMatMultExprTrait class template.
Header file for the DeclSym functor.
Header file for the TrueType type/value trait base class.
Header file for the IsExpression type trait class.
Header file for the function trace functionality.