DMatDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/blas/gemm.h>
44 #include <blaze/math/blas/trmm.h>
45 #include <blaze/math/Aliases.h>
52 #include <blaze/math/dense/MMM.h>
53 #include <blaze/math/Exception.h>
61 #include <blaze/math/Functions.h>
69 #include <blaze/math/shims/Reset.h>
71 #include <blaze/math/SIMD.h>
116 #include <blaze/system/BLAS.h>
117 #include <blaze/system/Blocking.h>
118 #include <blaze/system/Debugging.h>
120 #include <blaze/system/Thresholds.h>
121 #include <blaze/util/Assert.h>
122 #include <blaze/util/Complex.h>
126 #include <blaze/util/DisableIf.h>
127 #include <blaze/util/EnableIf.h>
130 #include <blaze/util/InvalidType.h>
131 #include <blaze/util/mpl/And.h>
132 #include <blaze/util/mpl/Bool.h>
133 #include <blaze/util/mpl/If.h>
134 #include <blaze/util/mpl/Not.h>
135 #include <blaze/util/mpl/Or.h>
136 #include <blaze/util/TrueType.h>
137 #include <blaze/util/Types.h>
147 
148 
149 namespace blaze {
150 
151 //=================================================================================================
152 //
153 // CLASS DMATDMATMULTEXPR
154 //
155 //=================================================================================================
156 
157 //*************************************************************************************************
164 template< typename MT1 // Type of the left-hand side dense matrix
165  , typename MT2 // Type of the right-hand side dense matrix
166  , bool SF // Symmetry flag
167  , bool HF // Hermitian flag
168  , bool LF // Lower flag
169  , bool UF > // Upper flag
170 class DMatDMatMultExpr : public DenseMatrix< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, false >
171  , private MatMatMultExpr
172  , private Computation
173 {
174  private:
175  //**Type definitions****************************************************************************
182  //**********************************************************************************************
183 
184  //**********************************************************************************************
186  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
187  //**********************************************************************************************
188 
189  //**********************************************************************************************
191  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
192  //**********************************************************************************************
193 
194  //**********************************************************************************************
196  enum : bool {
197  SYM = ( SF && !( HF || LF || UF ) ),
198  HERM = ( HF && !( LF || UF ) ),
199  LOW = ( LF || ( ( SF || HF ) && UF ) ),
200  UPP = ( UF || ( ( SF || HF ) && LF ) )
201  };
202  //**********************************************************************************************
203 
204  //**********************************************************************************************
206 
212  template< typename T1, typename T2, typename T3 >
213  struct CanExploitSymmetry {
214  enum : bool { value = IsColumnMajorMatrix<T1>::value &&
216  };
218  //**********************************************************************************************
219 
220  //**********************************************************************************************
222 
226  template< typename T1, typename T2, typename T3 >
227  struct IsEvaluationRequired {
228  enum : bool { value = ( evaluateLeft || evaluateRight ) &&
229  !CanExploitSymmetry<T1,T2,T3>::value };
230  };
232  //**********************************************************************************************
233 
234  //**********************************************************************************************
236 
239  template< typename T1, typename T2, typename T3 >
240  struct UseBlasKernel {
242  !SYM && !HERM && !LOW && !UPP &&
247  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
252  IsSame< ElementType_<T1>, ElementType_<T3> >::value };
253  };
255  //**********************************************************************************************
256 
257  //**********************************************************************************************
259 
262  template< typename T1, typename T2, typename T3 >
263  struct UseVectorizedDefaultKernel {
264  enum : bool { value = useOptimizedKernels &&
266  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
269  , ElementType_<T3> >::value &&
272  };
274  //**********************************************************************************************
275 
276  //**********************************************************************************************
278 
281  typedef IfTrue_< HERM
282  , DeclHerm
283  , IfTrue_< SYM
284  , DeclSym
285  , IfTrue_< LOW
286  , IfTrue_< UPP
287  , DeclDiag
288  , DeclLow >
289  , IfTrue_< UPP
290  , DeclUpp
291  , Noop > > > > ForwardFunctor;
293  //**********************************************************************************************
294 
295  public:
296  //**Type definitions****************************************************************************
299 
305  typedef const ElementType ReturnType;
306  typedef const ResultType CompositeType;
307 
309  typedef If_< IsExpression<MT1>, const MT1, const MT1& > LeftOperand;
310 
312  typedef If_< IsExpression<MT2>, const MT2, const MT2& > RightOperand;
313 
316 
319  //**********************************************************************************************
320 
321  //**Compilation flags***************************************************************************
323  enum : bool { simdEnabled = !IsDiagonal<MT2>::value &&
324  MT1::simdEnabled && MT2::simdEnabled &&
327 
329  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
330  !evaluateRight && MT2::smpAssignable };
331  //**********************************************************************************************
332 
333  //**SIMD properties*****************************************************************************
335  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
336  //**********************************************************************************************
337 
338  //**Constructor*********************************************************************************
344  explicit inline DMatDMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
345  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
346  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
347  {
348  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
349  }
350  //**********************************************************************************************
351 
352  //**Access operator*****************************************************************************
359  inline ReturnType operator()( size_t i, size_t j ) const {
360  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
361  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
362 
363  if( IsDiagonal<MT1>::value ) {
364  return lhs_(i,i) * rhs_(i,j);
365  }
366  else if( IsDiagonal<MT2>::value ) {
367  return lhs_(i,j) * rhs_(j,j);
368  }
370  const size_t begin( ( IsUpper<MT1>::value )
371  ?( ( IsLower<MT2>::value )
372  ?( max( ( IsStrictlyUpper<MT1>::value ? i+1UL : i )
373  , ( IsStrictlyLower<MT2>::value ? j+1UL : j ) ) )
374  :( IsStrictlyUpper<MT1>::value ? i+1UL : i ) )
375  :( ( IsLower<MT2>::value )
376  ?( IsStrictlyLower<MT2>::value ? j+1UL : j )
377  :( 0UL ) ) );
378  const size_t end( ( IsLower<MT1>::value )
379  ?( ( IsUpper<MT2>::value )
380  ?( min( ( IsStrictlyLower<MT1>::value ? i : i+1UL )
381  , ( IsStrictlyUpper<MT2>::value ? j : j+1UL ) ) )
382  :( IsStrictlyLower<MT1>::value ? i : i+1UL ) )
383  :( ( IsUpper<MT2>::value )
384  ?( IsStrictlyUpper<MT2>::value ? j : j+1UL )
385  :( lhs_.columns() ) ) );
386 
387  if( begin >= end ) return ElementType();
388 
389  const size_t n( end - begin );
390 
391  return subvector( row( lhs_, i ), begin, n ) * subvector( column( rhs_, j ), begin, n );
392  }
393  else {
394  return row( lhs_, i ) * column( rhs_, j );
395  }
396  }
397  //**********************************************************************************************
398 
399  //**At function*********************************************************************************
407  inline ReturnType at( size_t i, size_t j ) const {
408  if( i >= lhs_.rows() ) {
409  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
410  }
411  if( j >= rhs_.columns() ) {
412  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
413  }
414  return (*this)(i,j);
415  }
416  //**********************************************************************************************
417 
418  //**Rows function*******************************************************************************
423  inline size_t rows() const noexcept {
424  return lhs_.rows();
425  }
426  //**********************************************************************************************
427 
428  //**Columns function****************************************************************************
433  inline size_t columns() const noexcept {
434  return rhs_.columns();
435  }
436  //**********************************************************************************************
437 
438  //**Left operand access*************************************************************************
443  inline LeftOperand leftOperand() const noexcept {
444  return lhs_;
445  }
446  //**********************************************************************************************
447 
448  //**Right operand access************************************************************************
453  inline RightOperand rightOperand() const noexcept {
454  return rhs_;
455  }
456  //**********************************************************************************************
457 
458  //**********************************************************************************************
464  template< typename T >
465  inline bool canAlias( const T* alias ) const noexcept {
466  return ( lhs_.canAlias( alias ) || rhs_.canAlias( alias ) );
467  }
468  //**********************************************************************************************
469 
470  //**********************************************************************************************
476  template< typename T >
477  inline bool isAliased( const T* alias ) const noexcept {
478  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
479  }
480  //**********************************************************************************************
481 
482  //**********************************************************************************************
487  inline bool isAligned() const noexcept {
488  return lhs_.isAligned() && rhs_.isAligned();
489  }
490  //**********************************************************************************************
491 
492  //**********************************************************************************************
497  inline bool canSMPAssign() const noexcept {
498  return ( !BLAZE_BLAS_IS_PARALLEL ||
499  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
500  ( rows() * columns() >= SMP_DMATDMATMULT_THRESHOLD ) &&
502  }
503  //**********************************************************************************************
504 
505  private:
506  //**Member variables****************************************************************************
507  LeftOperand lhs_;
508  RightOperand rhs_;
509  //**********************************************************************************************
510 
511  //**Assignment to dense matrices****************************************************************
524  template< typename MT // Type of the target dense matrix
525  , bool SO > // Storage order of the target dense matrix
527  assign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
528  {
530 
531  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
532  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
533 
534  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
535  return;
536  }
537  else if( rhs.lhs_.columns() == 0UL ) {
538  reset( ~lhs );
539  return;
540  }
541 
542  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
543  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
544 
545  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
546  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
547  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
548  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
549  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
550  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
551 
552  DMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
553  }
555  //**********************************************************************************************
556 
557  //**Assignment to dense matrices (kernel selection)*********************************************
568  template< typename MT3 // Type of the left-hand side target matrix
569  , typename MT4 // Type of the left-hand side matrix operand
570  , typename MT5 > // Type of the right-hand side matrix operand
571  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
572  {
573  if( ( IsDiagonal<MT5>::value ) ||
574  ( !BLAZE_DEBUG_MODE && B.columns() <= SIMDSIZE*10UL ) ||
575  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
576  selectSmallAssignKernel( C, A, B );
577  else
578  selectBlasAssignKernel( C, A, B );
579  }
581  //**********************************************************************************************
582 
583  //**Default assignment to dense matrices (general/general)**************************************
597  template< typename MT3 // Type of the left-hand side target matrix
598  , typename MT4 // Type of the left-hand side matrix operand
599  , typename MT5 > // Type of the right-hand side matrix operand
601  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
602  {
603  const size_t M( A.rows() );
604  const size_t N( B.columns() );
605  const size_t K( A.columns() );
606 
607  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
608 
609  for( size_t i=0UL; i<M; ++i )
610  {
611  const size_t kbegin( ( IsUpper<MT4>::value )
612  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
613  :( 0UL ) );
614  const size_t kend( ( IsLower<MT4>::value )
615  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
616  :( K ) );
617  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
618 
619  if( IsStrictlyTriangular<MT4>::value && kbegin == kend ) {
620  for( size_t j=0UL; j<N; ++j ) {
621  reset( C(i,j) );
622  }
623  continue;
624  }
625 
626  {
627  const size_t jbegin( ( IsUpper<MT5>::value )
629  ?( UPP ? max(i,kbegin+1UL) : kbegin+1UL )
630  :( UPP ? max(i,kbegin) : kbegin ) )
631  :( UPP ? i : 0UL ) );
632  const size_t jend( ( IsLower<MT5>::value )
634  ?( LOW ? min(i+1UL,kbegin) : kbegin )
635  :( LOW ? min(i,kbegin)+1UL : kbegin+1UL ) )
636  :( LOW ? i+1UL : N ) );
637 
638  if( ( IsUpper<MT4>::value && IsUpper<MT5>::value ) || UPP ) {
639  for( size_t j=0UL; j<jbegin; ++j ) {
640  reset( C(i,j) );
641  }
642  }
643  else if( IsStrictlyUpper<MT5>::value ) {
644  reset( C(i,0UL) );
645  }
646  for( size_t j=jbegin; j<jend; ++j ) {
647  C(i,j) = A(i,kbegin) * B(kbegin,j);
648  }
649  if( ( IsLower<MT4>::value && IsLower<MT5>::value ) || LOW ) {
650  for( size_t j=jend; j<N; ++j ) {
651  reset( C(i,j) );
652  }
653  }
654  else if( IsStrictlyLower<MT5>::value ) {
655  reset( C(i,N-1UL) );
656  }
657  }
658 
659  for( size_t k=kbegin+1UL; k<kend; ++k )
660  {
661  const size_t jbegin( ( IsUpper<MT5>::value )
663  ?( SYM || HERM || UPP ? max( i, k+1UL ) : k+1UL )
664  :( SYM || HERM || UPP ? max( i, k ) : k ) )
665  :( SYM || HERM || UPP ? i : 0UL ) );
666  const size_t jend( ( IsLower<MT5>::value )
668  ?( LOW ? min(i+1UL,k-1UL) : k-1UL )
669  :( LOW ? min(i+1UL,k) : k ) )
670  :( LOW ? i+1UL : N ) );
671 
672  if( ( SYM || HERM || LOW || UPP ) && ( jbegin > jend ) ) continue;
673  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
674 
675  for( size_t j=jbegin; j<jend; ++j ) {
676  C(i,j) += A(i,k) * B(k,j);
677  }
678  if( IsLower<MT5>::value ) {
679  C(i,jend) = A(i,k) * B(k,jend);
680  }
681  }
682  }
683 
684  if( SYM || HERM ) {
685  for( size_t i=1UL; i<M; ++i ) {
686  for( size_t j=0UL; j<i; ++j ) {
687  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
688  }
689  }
690  }
691  }
693  //**********************************************************************************************
694 
695  //**Default assignment to dense matrices (general/diagonal)*************************************
709  template< typename MT3 // Type of the left-hand side target matrix
710  , typename MT4 // Type of the left-hand side matrix operand
711  , typename MT5 > // Type of the right-hand side matrix operand
712  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
713  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
714  {
716 
717  const size_t M( A.rows() );
718  const size_t N( B.columns() );
719 
720  for( size_t i=0UL; i<M; ++i )
721  {
722  const size_t jbegin( ( IsUpper<MT4>::value )
723  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
724  :( 0UL ) );
725  const size_t jend( ( IsLower<MT4>::value )
726  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
727  :( N ) );
728  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
729 
730  if( IsUpper<MT4>::value ) {
731  for( size_t j=0UL; j<jbegin; ++j ) {
732  reset( C(i,j) );
733  }
734  }
735  for( size_t j=jbegin; j<jend; ++j ) {
736  C(i,j) = A(i,j) * B(j,j);
737  }
738  if( IsLower<MT4>::value ) {
739  for( size_t j=jend; j<N; ++j ) {
740  reset( C(i,j) );
741  }
742  }
743  }
744  }
746  //**********************************************************************************************
747 
748  //**Default assignment to dense matrices (diagonal/general)*************************************
762  template< typename MT3 // Type of the left-hand side target matrix
763  , typename MT4 // Type of the left-hand side matrix operand
764  , typename MT5 > // Type of the right-hand side matrix operand
766  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
767  {
769 
770  const size_t M( A.rows() );
771  const size_t N( B.columns() );
772 
773  for( size_t i=0UL; i<M; ++i )
774  {
775  const size_t jbegin( ( IsUpper<MT5>::value )
776  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
777  :( 0UL ) );
778  const size_t jend( ( IsLower<MT5>::value )
779  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
780  :( N ) );
781  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
782 
783  if( IsUpper<MT5>::value ) {
784  for( size_t j=0UL; j<jbegin; ++j ) {
785  reset( C(i,j) );
786  }
787  }
788  for( size_t j=jbegin; j<jend; ++j ) {
789  C(i,j) = A(i,i) * B(i,j);
790  }
791  if( IsLower<MT5>::value ) {
792  for( size_t j=jend; j<N; ++j ) {
793  reset( C(i,j) );
794  }
795  }
796  }
797  }
799  //**********************************************************************************************
800 
801  //**Default assignment to dense matrices (diagonal/diagonal)************************************
815  template< typename MT3 // Type of the left-hand side target matrix
816  , typename MT4 // Type of the left-hand side matrix operand
817  , typename MT5 > // Type of the right-hand side matrix operand
818  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
819  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
820  {
822 
823  reset( C );
824 
825  for( size_t i=0UL; i<A.rows(); ++i ) {
826  C(i,i) = A(i,i) * B(i,i);
827  }
828  }
830  //**********************************************************************************************
831 
832  //**Default assignment to dense matrices (small matrices)***************************************
845  template< typename MT3 // Type of the left-hand side target matrix
846  , typename MT4 // Type of the left-hand side matrix operand
847  , typename MT5 > // Type of the right-hand side matrix operand
849  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B )
850  {
851  selectDefaultAssignKernel( C, A, B );
852  }
854  //**********************************************************************************************
855 
856  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
871  template< typename MT3 // Type of the left-hand side target matrix
872  , typename MT4 // Type of the left-hand side matrix operand
873  , typename MT5 > // Type of the right-hand side matrix operand
875  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
876  {
877  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT5>::value );
878 
879  const size_t M( A.rows() );
880  const size_t N( B.columns() );
881  const size_t K( A.columns() );
882 
883  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
884 
885  const size_t jpos( remainder ? ( N & size_t(-SIMDSIZE) ) : N );
886  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % SIMDSIZE ) ) == jpos, "Invalid end calculation" );
887 
888  if( LOW && UPP && N > SIMDSIZE*3UL ) {
889  reset( ~C );
890  }
891 
892  {
893  size_t j( 0UL );
894 
896  {
897  for( ; !SYM && !HERM && !LOW && !UPP && (j+SIMDSIZE*7UL) < jpos; j+=SIMDSIZE*8UL ) {
898  for( size_t i=0UL; i<M; ++i )
899  {
900  const size_t kbegin( ( IsUpper<MT4>::value )
901  ?( ( IsLower<MT5>::value )
902  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
903  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
904  :( IsLower<MT5>::value ? j : 0UL ) );
905  const size_t kend( ( IsLower<MT4>::value )
906  ?( ( IsUpper<MT5>::value )
907  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+SIMDSIZE*8UL, K ) )
908  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
909  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*8UL, K ) : K ) );
910 
911  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
912 
913  for( size_t k=kbegin; k<kend; ++k ) {
914  const SIMDType a1( set( A(i,k) ) );
915  xmm1 += a1 * B.load(k,j );
916  xmm2 += a1 * B.load(k,j+SIMDSIZE );
917  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
918  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
919  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
920  xmm6 += a1 * B.load(k,j+SIMDSIZE*5UL);
921  xmm7 += a1 * B.load(k,j+SIMDSIZE*6UL);
922  xmm8 += a1 * B.load(k,j+SIMDSIZE*7UL);
923  }
924 
925  (~C).store( i, j , xmm1 );
926  (~C).store( i, j+SIMDSIZE , xmm2 );
927  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
928  (~C).store( i, j+SIMDSIZE*3UL, xmm4 );
929  (~C).store( i, j+SIMDSIZE*4UL, xmm5 );
930  (~C).store( i, j+SIMDSIZE*5UL, xmm6 );
931  (~C).store( i, j+SIMDSIZE*6UL, xmm7 );
932  (~C).store( i, j+SIMDSIZE*7UL, xmm8 );
933  }
934  }
935  }
936 
937  for( ; !SYM && !HERM && !LOW && !UPP && (j+SIMDSIZE*4UL) < jpos; j+=SIMDSIZE*5UL )
938  {
939  size_t i( 0UL );
940 
941  for( ; (i+2UL) <= M; i+=2UL )
942  {
943  const size_t kbegin( ( IsUpper<MT4>::value )
944  ?( ( IsLower<MT5>::value )
945  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
946  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
947  :( IsLower<MT5>::value ? j : 0UL ) );
948  const size_t kend( ( IsLower<MT4>::value )
949  ?( ( IsUpper<MT5>::value )
950  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*5UL, K ) )
951  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
952  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*5UL, K ) : K ) );
953 
954  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;
955 
956  for( size_t k=kbegin; k<kend; ++k ) {
957  const SIMDType a1( set( A(i ,k) ) );
958  const SIMDType a2( set( A(i+1UL,k) ) );
959  const SIMDType b1( B.load(k,j ) );
960  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
961  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
962  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
963  const SIMDType b5( B.load(k,j+SIMDSIZE*4UL) );
964  xmm1 += a1 * b1;
965  xmm2 += a1 * b2;
966  xmm3 += a1 * b3;
967  xmm4 += a1 * b4;
968  xmm5 += a1 * b5;
969  xmm6 += a2 * b1;
970  xmm7 += a2 * b2;
971  xmm8 += a2 * b3;
972  xmm9 += a2 * b4;
973  xmm10 += a2 * b5;
974  }
975 
976  (~C).store( i , j , xmm1 );
977  (~C).store( i , j+SIMDSIZE , xmm2 );
978  (~C).store( i , j+SIMDSIZE*2UL, xmm3 );
979  (~C).store( i , j+SIMDSIZE*3UL, xmm4 );
980  (~C).store( i , j+SIMDSIZE*4UL, xmm5 );
981  (~C).store( i+1UL, j , xmm6 );
982  (~C).store( i+1UL, j+SIMDSIZE , xmm7 );
983  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm8 );
984  (~C).store( i+1UL, j+SIMDSIZE*3UL, xmm9 );
985  (~C).store( i+1UL, j+SIMDSIZE*4UL, xmm10 );
986  }
987 
988  if( i < M )
989  {
990  const size_t kbegin( ( IsUpper<MT4>::value )
991  ?( ( IsLower<MT5>::value )
992  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
993  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
994  :( IsLower<MT5>::value ? j : 0UL ) );
995  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*5UL, K ) ):( K ) );
996 
997  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5;
998 
999  for( size_t k=kbegin; k<kend; ++k ) {
1000  const SIMDType a1( set( A(i,k) ) );
1001  xmm1 += a1 * B.load(k,j );
1002  xmm2 += a1 * B.load(k,j+SIMDSIZE );
1003  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
1004  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
1005  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
1006  }
1007 
1008  (~C).store( i, j , xmm1 );
1009  (~C).store( i, j+SIMDSIZE , xmm2 );
1010  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
1011  (~C).store( i, j+SIMDSIZE*3UL, xmm4 );
1012  (~C).store( i, j+SIMDSIZE*4UL, xmm5 );
1013  }
1014  }
1015 
1016  for( ; !( LOW && UPP ) && (j+SIMDSIZE*3UL) < jpos; j+=SIMDSIZE*4UL )
1017  {
1018  const size_t iend( SYM || HERM || UPP ? min(j+SIMDSIZE*4UL,M) : M );
1019  size_t i( LOW ? j : 0UL );
1020 
1021  for( ; (i+2UL) <= iend; i+=2UL )
1022  {
1023  const size_t kbegin( ( IsUpper<MT4>::value )
1024  ?( ( IsLower<MT5>::value )
1025  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1026  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1027  :( IsLower<MT5>::value ? j : 0UL ) );
1028  const size_t kend( ( IsLower<MT4>::value )
1029  ?( ( IsUpper<MT5>::value )
1030  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*4UL, K ) )
1031  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
1032  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*4UL, K ) : K ) );
1033 
1034  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1035 
1036  for( size_t k=kbegin; k<kend; ++k ) {
1037  const SIMDType a1( set( A(i ,k) ) );
1038  const SIMDType a2( set( A(i+1UL,k) ) );
1039  const SIMDType b1( B.load(k,j ) );
1040  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
1041  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
1042  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
1043  xmm1 += a1 * b1;
1044  xmm2 += a1 * b2;
1045  xmm3 += a1 * b3;
1046  xmm4 += a1 * b4;
1047  xmm5 += a2 * b1;
1048  xmm6 += a2 * b2;
1049  xmm7 += a2 * b3;
1050  xmm8 += a2 * b4;
1051  }
1052 
1053  (~C).store( i , j , xmm1 );
1054  (~C).store( i , j+SIMDSIZE , xmm2 );
1055  (~C).store( i , j+SIMDSIZE*2UL, xmm3 );
1056  (~C).store( i , j+SIMDSIZE*3UL, xmm4 );
1057  (~C).store( i+1UL, j , xmm5 );
1058  (~C).store( i+1UL, j+SIMDSIZE , xmm6 );
1059  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm7 );
1060  (~C).store( i+1UL, j+SIMDSIZE*3UL, xmm8 );
1061  }
1062 
1063  if( i < iend )
1064  {
1065  const size_t kbegin( ( IsUpper<MT4>::value )
1066  ?( ( IsLower<MT5>::value )
1067  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1068  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1069  :( IsLower<MT5>::value ? j : 0UL ) );
1070  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*4UL, K ) ):( K ) );
1071 
1072  SIMDType xmm1, xmm2, xmm3, xmm4;
1073 
1074  for( size_t k=kbegin; k<kend; ++k ) {
1075  const SIMDType a1( set( A(i,k) ) );
1076  xmm1 += a1 * B.load(k,j );
1077  xmm2 += a1 * B.load(k,j+SIMDSIZE );
1078  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
1079  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
1080  }
1081 
1082  (~C).store( i, j , xmm1 );
1083  (~C).store( i, j+SIMDSIZE , xmm2 );
1084  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
1085  (~C).store( i, j+SIMDSIZE*3UL, xmm4 );
1086  }
1087  }
1088 
1089  for( ; (j+SIMDSIZE*2UL) < jpos; j+=SIMDSIZE*3UL )
1090  {
1091  const size_t iend( SYM || HERM || UPP ? min(j+SIMDSIZE*3UL,M) : M );
1092  size_t i( LOW ? j : 0UL );
1093 
1094  for( ; (i+2UL) <= iend; i+=2UL )
1095  {
1096  const size_t kbegin( ( IsUpper<MT4>::value )
1097  ?( ( IsLower<MT5>::value )
1098  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1099  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1100  :( IsLower<MT5>::value ? j : 0UL ) );
1101  const size_t kend( ( IsLower<MT4>::value )
1102  ?( ( IsUpper<MT5>::value )
1103  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*3UL, K ) )
1104  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
1105  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*3UL, K ) : K ) );
1106 
1107  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
1108 
1109  for( size_t k=kbegin; k<kend; ++k ) {
1110  const SIMDType a1( set( A(i ,k) ) );
1111  const SIMDType a2( set( A(i+1UL,k) ) );
1112  const SIMDType b1( B.load(k,j ) );
1113  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
1114  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
1115  xmm1 += a1 * b1;
1116  xmm2 += a1 * b2;
1117  xmm3 += a1 * b3;
1118  xmm4 += a2 * b1;
1119  xmm5 += a2 * b2;
1120  xmm6 += a2 * b3;
1121  }
1122 
1123  (~C).store( i , j , xmm1 );
1124  (~C).store( i , j+SIMDSIZE , xmm2 );
1125  (~C).store( i , j+SIMDSIZE*2UL, xmm3 );
1126  (~C).store( i+1UL, j , xmm4 );
1127  (~C).store( i+1UL, j+SIMDSIZE , xmm5 );
1128  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm6 );
1129  }
1130 
1131  if( i < iend )
1132  {
1133  const size_t kbegin( ( IsUpper<MT4>::value )
1134  ?( ( IsLower<MT5>::value )
1135  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1136  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1137  :( IsLower<MT5>::value ? j : 0UL ) );
1138  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*3UL, K ) ):( K ) );
1139 
1140  SIMDType xmm1, xmm2, xmm3;
1141 
1142  for( size_t k=kbegin; k<kend; ++k ) {
1143  const SIMDType a1( set( A(i,k) ) );
1144  xmm1 += a1 * B.load(k,j );
1145  xmm2 += a1 * B.load(k,j+SIMDSIZE );
1146  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
1147  }
1148 
1149  (~C).store( i, j , xmm1 );
1150  (~C).store( i, j+SIMDSIZE , xmm2 );
1151  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
1152  }
1153  }
1154 
1155  for( ; (j+SIMDSIZE) < jpos; j+=SIMDSIZE*2UL )
1156  {
1157  const size_t iend( SYM || HERM || UPP ? min(j+SIMDSIZE*2UL,M) : M );
1158  size_t i( LOW ? j : 0UL );
1159 
1160  for( ; (i+2UL) <= iend; i+=2UL )
1161  {
1162  const size_t kbegin( ( IsUpper<MT4>::value )
1163  ?( ( IsLower<MT5>::value )
1164  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1165  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1166  :( IsLower<MT5>::value ? j : 0UL ) );
1167  const size_t kend( ( IsLower<MT4>::value )
1168  ?( ( IsUpper<MT5>::value )
1169  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*2UL, K ) )
1170  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
1171  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*2UL, K ) : K ) );
1172 
1173  SIMDType xmm1, xmm2, xmm3, xmm4;
1174 
1175  for( size_t k=kbegin; k<kend; ++k ) {
1176  const SIMDType a1( set( A(i ,k) ) );
1177  const SIMDType a2( set( A(i+1UL,k) ) );
1178  const SIMDType b1( B.load(k,j ) );
1179  const SIMDType b2( B.load(k,j+SIMDSIZE) );
1180  xmm1 += a1 * b1;
1181  xmm2 += a1 * b2;
1182  xmm3 += a2 * b1;
1183  xmm4 += a2 * b2;
1184  }
1185 
1186  (~C).store( i , j , xmm1 );
1187  (~C).store( i , j+SIMDSIZE, xmm2 );
1188  (~C).store( i+1UL, j , xmm3 );
1189  (~C).store( i+1UL, j+SIMDSIZE, xmm4 );
1190  }
1191 
1192  if( i < iend )
1193  {
1194  const size_t kbegin( ( IsUpper<MT4>::value )
1195  ?( ( IsLower<MT5>::value )
1196  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1197  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1198  :( IsLower<MT5>::value ? j : 0UL ) );
1199  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*2UL, K ) ):( K ) );
1200 
1201  SIMDType xmm1, xmm2;
1202 
1203  for( size_t k=kbegin; k<kend; ++k ) {
1204  const SIMDType a1( set( A(i,k) ) );
1205  xmm1 += a1 * B.load(k,j );
1206  xmm2 += a1 * B.load(k,j+SIMDSIZE);
1207  }
1208 
1209  (~C).store( i, j , xmm1 );
1210  (~C).store( i, j+SIMDSIZE, xmm2 );
1211  }
1212  }
1213 
1214  for( ; j<jpos; j+=SIMDSIZE )
1215  {
1216  const size_t iend( SYM || HERM || UPP ? min(j+SIMDSIZE,M) : M );
1217  size_t i( LOW ? j : 0UL );
1218 
1219  for( ; (i+2UL) <= iend; i+=2UL )
1220  {
1221  const size_t kbegin( ( IsUpper<MT4>::value )
1222  ?( ( IsLower<MT5>::value )
1223  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1224  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1225  :( IsLower<MT5>::value ? j : 0UL ) );
1226  const size_t kend( ( IsLower<MT4>::value )
1227  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
1228  :( K ) );
1229 
1230  SIMDType xmm1, xmm2;
1231 
1232  for( size_t k=kbegin; k<kend; ++k ) {
1233  const SIMDType b1( B.load(k,j) );
1234  xmm1 += set( A(i ,k) ) * b1;
1235  xmm2 += set( A(i+1UL,k) ) * b1;
1236  }
1237 
1238  (~C).store( i , j, xmm1 );
1239  (~C).store( i+1UL, j, xmm2 );
1240  }
1241 
1242  if( i < iend )
1243  {
1244  const size_t kbegin( ( IsUpper<MT4>::value )
1245  ?( ( IsLower<MT5>::value )
1246  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1247  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1248  :( IsLower<MT5>::value ? j : 0UL ) );
1249 
1250  SIMDType xmm1;
1251 
1252  for( size_t k=kbegin; k<K; ++k ) {
1253  xmm1 += set( A(i,k) ) * B.load(k,j);
1254  }
1255 
1256  (~C).store( i, j, xmm1 );
1257  }
1258  }
1259 
1260  for( ; remainder && j<N; ++j )
1261  {
1262  size_t i( LOW && UPP ? j : 0UL );
1263 
1264  for( ; (i+2UL) <= M; i+=2UL )
1265  {
1266  const size_t kbegin( ( IsUpper<MT4>::value )
1267  ?( ( IsLower<MT5>::value )
1268  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1269  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1270  :( IsLower<MT5>::value ? j : 0UL ) );
1271  const size_t kend( ( IsLower<MT4>::value )
1272  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
1273  :( K ) );
1274 
1275  ElementType value1 = ElementType();
1276  ElementType value2 = ElementType();
1277 
1278  for( size_t k=kbegin; k<kend; ++k ) {
1279  value1 += A(i ,k) * B(k,j);
1280  value2 += A(i+1UL,k) * B(k,j);
1281  }
1282 
1283  (~C)(i ,j) = value1;
1284  (~C)(i+1UL,j) = value2;
1285  }
1286 
1287  if( i < M )
1288  {
1289  const size_t kbegin( ( IsUpper<MT4>::value )
1290  ?( ( IsLower<MT5>::value )
1291  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1292  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1293  :( IsLower<MT5>::value ? j : 0UL ) );
1294 
1295  ElementType value = ElementType();
1296 
1297  for( size_t k=kbegin; k<K; ++k ) {
1298  value += A(i,k) * B(k,j);
1299  }
1300 
1301  (~C)(i,j) = value;
1302  }
1303  }
1304  }
1305 
1306  if( ( SYM || HERM ) && ( N > SIMDSIZE*4UL ) ) {
1307  for( size_t i=SIMDSIZE*4UL; i<M; ++i ) {
1308  const size_t jend( ( SIMDSIZE*4UL ) * ( i / (SIMDSIZE*4UL) ) );
1309  for( size_t j=0UL; j<jend; ++j ) {
1310  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
1311  }
1312  }
1313  }
1314  else if( LOW && !UPP && N > SIMDSIZE*4UL ) {
1315  for( size_t j=SIMDSIZE*4UL; j<N; ++j ) {
1316  const size_t iend( ( SIMDSIZE*4UL ) * ( j / (SIMDSIZE*4UL) ) );
1317  for( size_t i=0UL; i<iend; ++i ) {
1318  reset( (~C)(i,j) );
1319  }
1320  }
1321  }
1322  else if( !LOW && UPP && N > SIMDSIZE*4UL ) {
1323  for( size_t i=SIMDSIZE*4UL; i<M; ++i ) {
1324  const size_t jend( ( SIMDSIZE*4UL ) * ( i / (SIMDSIZE*4UL) ) );
1325  for( size_t j=0UL; j<jend; ++j ) {
1326  reset( (~C)(i,j) );
1327  }
1328  }
1329  }
1330  }
1332  //**********************************************************************************************
1333 
1334  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
1349  template< typename MT3 // Type of the left-hand side target matrix
1350  , typename MT4 // Type of the left-hand side matrix operand
1351  , typename MT5 > // Type of the right-hand side matrix operand
1353  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1354  {
1359 
1360  const ForwardFunctor fwd;
1361 
1363  const OppositeType_<MT4> tmp( serial( A ) );
1364  assign( ~C, fwd( tmp * B ) );
1365  }
1367  const OppositeType_<MT5> tmp( serial( B ) );
1368  assign( ~C, fwd( A * tmp ) );
1369  }
1370  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1371  const OppositeType_<MT4> tmp( serial( A ) );
1372  assign( ~C, fwd( tmp * B ) );
1373  }
1374  else {
1375  const OppositeType_<MT5> tmp( serial( B ) );
1376  assign( ~C, fwd( A * tmp ) );
1377  }
1378  }
1380  //**********************************************************************************************
1381 
1382  //**Default assignment to dense matrices (large matrices)***************************************
1395  template< typename MT3 // Type of the left-hand side target matrix
1396  , typename MT4 // Type of the left-hand side matrix operand
1397  , typename MT5 > // Type of the right-hand side matrix operand
1399  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1400  {
1401  selectDefaultAssignKernel( C, A, B );
1402  }
1404  //**********************************************************************************************
1405 
1406  //**Vectorized default assignment to dense matrices (large matrices)****************************
1420  template< typename MT3 // Type of the left-hand side target matrix
1421  , typename MT4 // Type of the left-hand side matrix operand
1422  , typename MT5 > // Type of the right-hand side matrix operand
1424  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1425  {
1426  if( SYM )
1427  smmm( C, A, B, ElementType(1) );
1428  else if( HERM )
1429  hmmm( C, A, B, ElementType(1) );
1430  else if( LOW )
1431  lmmm( C, A, B, ElementType(1), ElementType(0) );
1432  else if( UPP )
1433  ummm( C, A, B, ElementType(1), ElementType(0) );
1434  else
1435  mmm( C, A, B, ElementType(1), ElementType(0) );
1436  }
1438  //**********************************************************************************************
1439 
1440  //**BLAS-based assignment to dense matrices (default)*******************************************
1453  template< typename MT3 // Type of the left-hand side target matrix
1454  , typename MT4 // Type of the left-hand side matrix operand
1455  , typename MT5 > // Type of the right-hand side matrix operand
1457  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1458  {
1459  selectLargeAssignKernel( C, A, B );
1460  }
1462  //**********************************************************************************************
1463 
1464  //**BLAS-based assignment to dense matrices*****************************************************
1465 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
1466 
1478  template< typename MT3 // Type of the left-hand side target matrix
1479  , typename MT4 // Type of the left-hand side matrix operand
1480  , typename MT5 > // Type of the right-hand side matrix operand
1482  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1483  {
1484  typedef ElementType_<MT3> ET;
1485 
1486  if( IsTriangular<MT4>::value ) {
1487  assign( C, B );
1488  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1489  }
1490  else if( IsTriangular<MT5>::value ) {
1491  assign( C, A );
1492  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1493  }
1494  else {
1495  gemm( C, A, B, ET(1), ET(0) );
1496  }
1497  }
1499 #endif
1500  //**********************************************************************************************
1501 
1502  //**Assignment to sparse matrices***************************************************************
1515  template< typename MT // Type of the target sparse matrix
1516  , bool SO > // Storage order of the target sparse matrix
1518  assign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
1519  {
1521 
1523 
1530 
1531  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1532  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1533 
1534  const ForwardFunctor fwd;
1535 
1536  const TmpType tmp( serial( rhs ) );
1537  assign( ~lhs, fwd( tmp ) );
1538  }
1540  //**********************************************************************************************
1541 
1542  //**Restructuring assignment to column-major matrices*******************************************
1557  template< typename MT > // Type of the target matrix
1559  assign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
1560  {
1562 
1564 
1565  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1566  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1567 
1568  const ForwardFunctor fwd;
1569 
1571  assign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
1572  else if( IsSymmetric<MT1>::value )
1573  assign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
1574  else
1575  assign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
1576  }
1578  //**********************************************************************************************
1579 
1580  //**Addition assignment to dense matrices*******************************************************
1593  template< typename MT // Type of the target dense matrix
1594  , bool SO > // Storage order of the target dense matrix
1596  addAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
1597  {
1599 
1600  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1601  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1602 
1603  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1604  return;
1605  }
1606 
1607  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1608  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1609 
1610  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1611  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1612  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1613  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1614  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1615  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1616 
1617  DMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1618  }
1620  //**********************************************************************************************
1621 
1622  //**Addition assignment to dense matrices (kernel selection)************************************
1633  template< typename MT3 // Type of the left-hand side target matrix
1634  , typename MT4 // Type of the left-hand side matrix operand
1635  , typename MT5 > // Type of the right-hand side matrix operand
1636  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1637  {
1638  if( ( IsDiagonal<MT5>::value ) ||
1639  ( !BLAZE_DEBUG_MODE && B.columns() <= SIMDSIZE*10UL ) ||
1640  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
1641  selectSmallAddAssignKernel( C, A, B );
1642  else
1643  selectBlasAddAssignKernel( C, A, B );
1644  }
1646  //**********************************************************************************************
1647 
1648  //**Default addition assignment to dense matrices (general/general)*****************************
1662  template< typename MT3 // Type of the left-hand side target matrix
1663  , typename MT4 // Type of the left-hand side matrix operand
1664  , typename MT5 > // Type of the right-hand side matrix operand
1665  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
1666  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1667  {
1668  const size_t M( A.rows() );
1669  const size_t N( B.columns() );
1670  const size_t K( A.columns() );
1671 
1672  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1673 
1674  for( size_t i=0UL; i<M; ++i )
1675  {
1676  const size_t kbegin( ( IsUpper<MT4>::value )
1677  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1678  :( 0UL ) );
1679  const size_t kend( ( IsLower<MT4>::value )
1680  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
1681  :( K ) );
1682  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
1683 
1684  for( size_t k=kbegin; k<kend; ++k )
1685  {
1686  const size_t jbegin( ( IsUpper<MT5>::value )
1688  ?( UPP ? max(i,k+1UL) : k+1UL )
1689  :( UPP ? max(i,k) : k ) )
1690  :( UPP ? i : 0UL ) );
1691  const size_t jend( ( IsLower<MT5>::value )
1693  ?( LOW ? min(i+1UL,k) : k )
1694  :( LOW ? min(i,k)+1UL : k+1UL ) )
1695  :( LOW ? i+1UL : N ) );
1696 
1697  if( ( LOW || UPP ) && ( jbegin >= jend ) ) continue;
1698  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1699 
1700  const size_t jnum( jend - jbegin );
1701  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1702 
1703  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1704  C(i,j ) += A(i,k) * B(k,j );
1705  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1706  }
1707  if( jpos < jend ) {
1708  C(i,jpos) += A(i,k) * B(k,jpos);
1709  }
1710  }
1711  }
1712  }
1714  //**********************************************************************************************
1715 
1716  //**Default addition assignment to dense matrices (general/diagonal)****************************
1730  template< typename MT3 // Type of the left-hand side target matrix
1731  , typename MT4 // Type of the left-hand side matrix operand
1732  , typename MT5 > // Type of the right-hand side matrix operand
1733  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
1734  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1735  {
1737 
1738  const size_t M( A.rows() );
1739  const size_t N( B.columns() );
1740 
1741  for( size_t i=0UL; i<M; ++i )
1742  {
1743  const size_t jbegin( ( IsUpper<MT4>::value )
1744  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1745  :( 0UL ) );
1746  const size_t jend( ( IsLower<MT4>::value )
1747  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
1748  :( N ) );
1749  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1750 
1751  const size_t jnum( jend - jbegin );
1752  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1753 
1754  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1755  C(i,j ) += A(i,j ) * B(j ,j );
1756  C(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL);
1757  }
1758  if( jpos < jend ) {
1759  C(i,jpos) += A(i,jpos) * B(jpos,jpos);
1760  }
1761  }
1762  }
1764  //**********************************************************************************************
1765 
1766  //**Default addition assignment to dense matrices (diagonal/general)****************************
1780  template< typename MT3 // Type of the left-hand side target matrix
1781  , typename MT4 // Type of the left-hand side matrix operand
1782  , typename MT5 > // Type of the right-hand side matrix operand
1783  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
1784  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1785  {
1787 
1788  const size_t M( A.rows() );
1789  const size_t N( B.columns() );
1790 
1791  for( size_t i=0UL; i<M; ++i )
1792  {
1793  const size_t jbegin( ( IsUpper<MT5>::value )
1794  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
1795  :( 0UL ) );
1796  const size_t jend( ( IsLower<MT5>::value )
1797  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
1798  :( N ) );
1799  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1800 
1801  const size_t jnum( jend - jbegin );
1802  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1803 
1804  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1805  C(i,j ) += A(i,i) * B(i,j );
1806  C(i,j+1UL) += A(i,i) * B(i,j+1UL);
1807  }
1808  if( jpos < jend ) {
1809  C(i,jpos) += A(i,i) * B(i,jpos);
1810  }
1811  }
1812  }
1814  //**********************************************************************************************
1815 
1816  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
1830  template< typename MT3 // Type of the left-hand side target matrix
1831  , typename MT4 // Type of the left-hand side matrix operand
1832  , typename MT5 > // Type of the right-hand side matrix operand
1833  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
1834  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1835  {
1837 
1838  for( size_t i=0UL; i<A.rows(); ++i ) {
1839  C(i,i) += A(i,i) * B(i,i);
1840  }
1841  }
1843  //**********************************************************************************************
1844 
1845  //**Default addition assignment to dense matrices (small matrices)******************************
1859  template< typename MT3 // Type of the left-hand side target matrix
1860  , typename MT4 // Type of the left-hand side matrix operand
1861  , typename MT5 > // Type of the right-hand side matrix operand
1863  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1864  {
1865  selectDefaultAddAssignKernel( C, A, B );
1866  }
1868  //**********************************************************************************************
1869 
1870  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
1885  template< typename MT3 // Type of the left-hand side target matrix
1886  , typename MT4 // Type of the left-hand side matrix operand
1887  , typename MT5 > // Type of the right-hand side matrix operand
1889  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1890  {
1891  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT5>::value );
1892 
1893  const size_t M( A.rows() );
1894  const size_t N( B.columns() );
1895  const size_t K( A.columns() );
1896 
1897  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
1898 
1899  const size_t jpos( remainder ? ( N & size_t(-SIMDSIZE) ) : N );
1900  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % SIMDSIZE ) ) == jpos, "Invalid end calculation" );
1901 
1902  size_t j( 0UL );
1903 
1905  {
1906  for( ; !LOW && !UPP && (j+SIMDSIZE*7UL) < jpos; j+=SIMDSIZE*8UL ) {
1907  for( size_t i=0UL; i<M; ++i )
1908  {
1909  const size_t kbegin( ( IsUpper<MT4>::value )
1910  ?( ( IsLower<MT5>::value )
1911  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1912  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1913  :( IsLower<MT5>::value ? j : 0UL ) );
1914  const size_t kend( ( IsLower<MT4>::value )
1915  ?( ( IsUpper<MT5>::value )
1916  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+SIMDSIZE*8UL, K ) )
1917  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1918  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*8UL, K ) : K ) );
1919 
1920  SIMDType xmm1( (~C).load(i,j ) );
1921  SIMDType xmm2( (~C).load(i,j+SIMDSIZE ) );
1922  SIMDType xmm3( (~C).load(i,j+SIMDSIZE*2UL) );
1923  SIMDType xmm4( (~C).load(i,j+SIMDSIZE*3UL) );
1924  SIMDType xmm5( (~C).load(i,j+SIMDSIZE*4UL) );
1925  SIMDType xmm6( (~C).load(i,j+SIMDSIZE*5UL) );
1926  SIMDType xmm7( (~C).load(i,j+SIMDSIZE*6UL) );
1927  SIMDType xmm8( (~C).load(i,j+SIMDSIZE*7UL) );
1928 
1929  for( size_t k=kbegin; k<kend; ++k ) {
1930  const SIMDType a1( set( A(i,k) ) );
1931  xmm1 += a1 * B.load(k,j );
1932  xmm2 += a1 * B.load(k,j+SIMDSIZE );
1933  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
1934  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
1935  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
1936  xmm6 += a1 * B.load(k,j+SIMDSIZE*5UL);
1937  xmm7 += a1 * B.load(k,j+SIMDSIZE*6UL);
1938  xmm8 += a1 * B.load(k,j+SIMDSIZE*7UL);
1939  }
1940 
1941  (~C).store( i, j , xmm1 );
1942  (~C).store( i, j+SIMDSIZE , xmm2 );
1943  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
1944  (~C).store( i, j+SIMDSIZE*3UL, xmm4 );
1945  (~C).store( i, j+SIMDSIZE*4UL, xmm5 );
1946  (~C).store( i, j+SIMDSIZE*5UL, xmm6 );
1947  (~C).store( i, j+SIMDSIZE*6UL, xmm7 );
1948  (~C).store( i, j+SIMDSIZE*7UL, xmm8 );
1949  }
1950  }
1951  }
1952 
1953  for( ; !LOW && !UPP && (j+SIMDSIZE*4UL) < jpos; j+=SIMDSIZE*5UL )
1954  {
1955  size_t i( 0UL );
1956 
1957  for( ; (i+2UL) <= M; i+=2UL )
1958  {
1959  const size_t kbegin( ( IsUpper<MT4>::value )
1960  ?( ( IsLower<MT5>::value )
1961  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
1962  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1963  :( IsLower<MT5>::value ? j : 0UL ) );
1964  const size_t kend( ( IsLower<MT4>::value )
1965  ?( ( IsUpper<MT5>::value )
1966  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*5UL, K ) )
1967  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
1968  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*5UL, K ) : K ) );
1969 
1970  SIMDType xmm1 ( (~C).load(i ,j ) );
1971  SIMDType xmm2 ( (~C).load(i ,j+SIMDSIZE ) );
1972  SIMDType xmm3 ( (~C).load(i ,j+SIMDSIZE*2UL) );
1973  SIMDType xmm4 ( (~C).load(i ,j+SIMDSIZE*3UL) );
1974  SIMDType xmm5 ( (~C).load(i ,j+SIMDSIZE*4UL) );
1975  SIMDType xmm6 ( (~C).load(i+1UL,j ) );
1976  SIMDType xmm7 ( (~C).load(i+1UL,j+SIMDSIZE ) );
1977  SIMDType xmm8 ( (~C).load(i+1UL,j+SIMDSIZE*2UL) );
1978  SIMDType xmm9 ( (~C).load(i+1UL,j+SIMDSIZE*3UL) );
1979  SIMDType xmm10( (~C).load(i+1UL,j+SIMDSIZE*4UL) );
1980 
1981  for( size_t k=kbegin; k<kend; ++k ) {
1982  const SIMDType a1( set( A(i ,k) ) );
1983  const SIMDType a2( set( A(i+1UL,k) ) );
1984  const SIMDType b1( B.load(k,j ) );
1985  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
1986  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
1987  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
1988  const SIMDType b5( B.load(k,j+SIMDSIZE*4UL) );
1989  xmm1 += a1 * b1;
1990  xmm2 += a1 * b2;
1991  xmm3 += a1 * b3;
1992  xmm4 += a1 * b4;
1993  xmm5 += a1 * b5;
1994  xmm6 += a2 * b1;
1995  xmm7 += a2 * b2;
1996  xmm8 += a2 * b3;
1997  xmm9 += a2 * b4;
1998  xmm10 += a2 * b5;
1999  }
2000 
2001  (~C).store( i , j , xmm1 );
2002  (~C).store( i , j+SIMDSIZE , xmm2 );
2003  (~C).store( i , j+SIMDSIZE*2UL, xmm3 );
2004  (~C).store( i , j+SIMDSIZE*3UL, xmm4 );
2005  (~C).store( i , j+SIMDSIZE*4UL, xmm5 );
2006  (~C).store( i+1UL, j , xmm6 );
2007  (~C).store( i+1UL, j+SIMDSIZE , xmm7 );
2008  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm8 );
2009  (~C).store( i+1UL, j+SIMDSIZE*3UL, xmm9 );
2010  (~C).store( i+1UL, j+SIMDSIZE*4UL, xmm10 );
2011  }
2012 
2013  if( i < M )
2014  {
2015  const size_t kbegin( ( IsUpper<MT4>::value )
2016  ?( ( IsLower<MT5>::value )
2017  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2018  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2019  :( IsLower<MT5>::value ? j : 0UL ) );
2020  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*5UL, K ) ):( K ) );
2021 
2022  SIMDType xmm1( (~C).load(i,j ) );
2023  SIMDType xmm2( (~C).load(i,j+SIMDSIZE ) );
2024  SIMDType xmm3( (~C).load(i,j+SIMDSIZE*2UL) );
2025  SIMDType xmm4( (~C).load(i,j+SIMDSIZE*3UL) );
2026  SIMDType xmm5( (~C).load(i,j+SIMDSIZE*4UL) );
2027 
2028  for( size_t k=kbegin; k<kend; ++k ) {
2029  const SIMDType a1( set( A(i,k) ) );
2030  xmm1 += a1 * B.load(k,j );
2031  xmm2 += a1 * B.load(k,j+SIMDSIZE );
2032  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
2033  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
2034  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
2035  }
2036 
2037  (~C).store( i, j , xmm1 );
2038  (~C).store( i, j+SIMDSIZE , xmm2 );
2039  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
2040  (~C).store( i, j+SIMDSIZE*3UL, xmm4 );
2041  (~C).store( i, j+SIMDSIZE*4UL, xmm5 );
2042  }
2043  }
2044 
2045  for( ; !LOW && !UPP && (j+SIMDSIZE*3UL) < jpos; j+=SIMDSIZE*4UL )
2046  {
2047  size_t i( 0UL );
2048 
2049  for( ; (i+2UL) <= M; i+=2UL )
2050  {
2051  const size_t kbegin( ( IsUpper<MT4>::value )
2052  ?( ( IsLower<MT5>::value )
2053  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2054  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2055  :( IsLower<MT5>::value ? j : 0UL ) );
2056  const size_t kend( ( IsLower<MT4>::value )
2057  ?( ( IsUpper<MT5>::value )
2058  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*4UL, K ) )
2059  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
2060  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*4UL, K ) : K ) );
2061 
2062  SIMDType xmm1( (~C).load(i ,j ) );
2063  SIMDType xmm2( (~C).load(i ,j+SIMDSIZE ) );
2064  SIMDType xmm3( (~C).load(i ,j+SIMDSIZE*2UL) );
2065  SIMDType xmm4( (~C).load(i ,j+SIMDSIZE*3UL) );
2066  SIMDType xmm5( (~C).load(i+1UL,j ) );
2067  SIMDType xmm6( (~C).load(i+1UL,j+SIMDSIZE ) );
2068  SIMDType xmm7( (~C).load(i+1UL,j+SIMDSIZE*2UL) );
2069  SIMDType xmm8( (~C).load(i+1UL,j+SIMDSIZE*3UL) );
2070 
2071  for( size_t k=kbegin; k<kend; ++k ) {
2072  const SIMDType a1( set( A(i ,k) ) );
2073  const SIMDType a2( set( A(i+1UL,k) ) );
2074  const SIMDType b1( B.load(k,j ) );
2075  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
2076  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
2077  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
2078  xmm1 += a1 * b1;
2079  xmm2 += a1 * b2;
2080  xmm3 += a1 * b3;
2081  xmm4 += a1 * b4;
2082  xmm5 += a2 * b1;
2083  xmm6 += a2 * b2;
2084  xmm7 += a2 * b3;
2085  xmm8 += a2 * b4;
2086  }
2087 
2088  (~C).store( i , j , xmm1 );
2089  (~C).store( i , j+SIMDSIZE , xmm2 );
2090  (~C).store( i , j+SIMDSIZE*2UL, xmm3 );
2091  (~C).store( i , j+SIMDSIZE*3UL, xmm4 );
2092  (~C).store( i+1UL, j , xmm5 );
2093  (~C).store( i+1UL, j+SIMDSIZE , xmm6 );
2094  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm7 );
2095  (~C).store( i+1UL, j+SIMDSIZE*3UL, xmm8 );
2096  }
2097 
2098  if( i < M )
2099  {
2100  const size_t kbegin( ( IsUpper<MT4>::value )
2101  ?( ( IsLower<MT5>::value )
2102  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2103  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2104  :( IsLower<MT5>::value ? j : 0UL ) );
2105  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*4UL, K ) ):( K ) );
2106 
2107  SIMDType xmm1( (~C).load(i,j ) );
2108  SIMDType xmm2( (~C).load(i,j+SIMDSIZE ) );
2109  SIMDType xmm3( (~C).load(i,j+SIMDSIZE*2UL) );
2110  SIMDType xmm4( (~C).load(i,j+SIMDSIZE*3UL) );
2111 
2112  for( size_t k=kbegin; k<kend; ++k ) {
2113  const SIMDType a1( set( A(i,k) ) );
2114  xmm1 += a1 * B.load(k,j );
2115  xmm2 += a1 * B.load(k,j+SIMDSIZE );
2116  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
2117  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
2118  }
2119 
2120  (~C).store( i, j , xmm1 );
2121  (~C).store( i, j+SIMDSIZE , xmm2 );
2122  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
2123  (~C).store( i, j+SIMDSIZE*3UL, xmm4 );
2124  }
2125  }
2126 
2127  for( ; !LOW && !UPP && (j+SIMDSIZE*2UL) < jpos; j+=SIMDSIZE*3UL )
2128  {
2129  size_t i( 0UL );
2130 
2131  for( ; (i+2UL) <= M; i+=2UL )
2132  {
2133  const size_t kbegin( ( IsUpper<MT4>::value )
2134  ?( ( IsLower<MT5>::value )
2135  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2136  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2137  :( IsLower<MT5>::value ? j : 0UL ) );
2138  const size_t kend( ( IsLower<MT4>::value )
2139  ?( ( IsUpper<MT5>::value )
2140  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*3UL, K ) )
2141  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
2142  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*3UL, K ) : K ) );
2143 
2144  SIMDType xmm1( (~C).load(i ,j ) );
2145  SIMDType xmm2( (~C).load(i ,j+SIMDSIZE ) );
2146  SIMDType xmm3( (~C).load(i ,j+SIMDSIZE*2UL) );
2147  SIMDType xmm4( (~C).load(i+1UL,j ) );
2148  SIMDType xmm5( (~C).load(i+1UL,j+SIMDSIZE ) );
2149  SIMDType xmm6( (~C).load(i+1UL,j+SIMDSIZE*2UL) );
2150 
2151  for( size_t k=kbegin; k<kend; ++k ) {
2152  const SIMDType a1( set( A(i ,k) ) );
2153  const SIMDType a2( set( A(i+1UL,k) ) );
2154  const SIMDType b1( B.load(k,j ) );
2155  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
2156  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
2157  xmm1 += a1 * b1;
2158  xmm2 += a1 * b2;
2159  xmm3 += a1 * b3;
2160  xmm4 += a2 * b1;
2161  xmm5 += a2 * b2;
2162  xmm6 += a2 * b3;
2163  }
2164 
2165  (~C).store( i , j , xmm1 );
2166  (~C).store( i , j+SIMDSIZE , xmm2 );
2167  (~C).store( i , j+SIMDSIZE*2UL, xmm3 );
2168  (~C).store( i+1UL, j , xmm4 );
2169  (~C).store( i+1UL, j+SIMDSIZE , xmm5 );
2170  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm6 );
2171  }
2172 
2173  if( i < M )
2174  {
2175  const size_t kbegin( ( IsUpper<MT4>::value )
2176  ?( ( IsLower<MT5>::value )
2177  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2178  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2179  :( IsLower<MT5>::value ? j : 0UL ) );
2180  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*3UL, K ) ):( K ) );
2181 
2182  SIMDType xmm1( (~C).load(i,j ) );
2183  SIMDType xmm2( (~C).load(i,j+SIMDSIZE ) );
2184  SIMDType xmm3( (~C).load(i,j+SIMDSIZE*2UL) );
2185 
2186  for( size_t k=kbegin; k<kend; ++k ) {
2187  const SIMDType a1( set( A(i,k) ) );
2188  xmm1 += a1 * B.load(k,j );
2189  xmm2 += a1 * B.load(k,j+SIMDSIZE );
2190  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
2191  }
2192 
2193  (~C).store( i, j , xmm1 );
2194  (~C).store( i, j+SIMDSIZE , xmm2 );
2195  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
2196  }
2197  }
2198 
2199  for( ; !( LOW && UPP ) && (j+SIMDSIZE) < jpos; j+=SIMDSIZE*2UL )
2200  {
2201  const size_t iend( UPP ? min(j+SIMDSIZE*2UL,M) : M );
2202  size_t i( LOW ? j : 0UL );
2203 
2204  for( ; (i+2UL) <= iend; i+=2UL )
2205  {
2206  const size_t kbegin( ( IsUpper<MT4>::value )
2207  ?( ( IsLower<MT5>::value )
2208  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2209  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2210  :( IsLower<MT5>::value ? j : 0UL ) );
2211  const size_t kend( ( IsLower<MT4>::value )
2212  ?( ( IsUpper<MT5>::value )
2213  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*2UL, K ) )
2214  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
2215  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*2UL, K ) : K ) );
2216 
2217  SIMDType xmm1( (~C).load(i ,j ) );
2218  SIMDType xmm2( (~C).load(i ,j+SIMDSIZE) );
2219  SIMDType xmm3( (~C).load(i+1UL,j ) );
2220  SIMDType xmm4( (~C).load(i+1UL,j+SIMDSIZE) );
2221 
2222  for( size_t k=kbegin; k<kend; ++k ) {
2223  const SIMDType a1( set( A(i ,k) ) );
2224  const SIMDType a2( set( A(i+1UL,k) ) );
2225  const SIMDType b1( B.load(k,j ) );
2226  const SIMDType b2( B.load(k,j+SIMDSIZE) );
2227  xmm1 += a1 * b1;
2228  xmm2 += a1 * b2;
2229  xmm3 += a2 * b1;
2230  xmm4 += a2 * b2;
2231  }
2232 
2233  (~C).store( i , j , xmm1 );
2234  (~C).store( i , j+SIMDSIZE, xmm2 );
2235  (~C).store( i+1UL, j , xmm3 );
2236  (~C).store( i+1UL, j+SIMDSIZE, xmm4 );
2237  }
2238 
2239  if( i < iend )
2240  {
2241  const size_t kbegin( ( IsUpper<MT4>::value )
2242  ?( ( IsLower<MT5>::value )
2243  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2244  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2245  :( IsLower<MT5>::value ? j : 0UL ) );
2246  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*2UL, K ) ):( K ) );
2247 
2248  SIMDType xmm1( (~C).load(i,j ) );
2249  SIMDType xmm2( (~C).load(i,j+SIMDSIZE) );
2250 
2251  for( size_t k=kbegin; k<kend; ++k ) {
2252  const SIMDType a1( set( A(i,k) ) );
2253  xmm1 += a1 * B.load(k,j );
2254  xmm2 += a1 * B.load(k,j+SIMDSIZE);
2255  }
2256 
2257  (~C).store( i, j , xmm1 );
2258  (~C).store( i, j+SIMDSIZE, xmm2 );
2259  }
2260  }
2261 
2262  for( ; j<jpos; j+=SIMDSIZE )
2263  {
2264  const size_t iend( LOW && UPP ? min(j+SIMDSIZE,M) : M );
2265  size_t i( LOW ? j : 0UL );
2266 
2267  for( ; (i+2UL) <= iend; i+=2UL )
2268  {
2269  const size_t kbegin( ( IsUpper<MT4>::value )
2270  ?( ( IsLower<MT5>::value )
2271  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2272  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2273  :( IsLower<MT5>::value ? j : 0UL ) );
2274  const size_t kend( ( IsLower<MT4>::value )
2275  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
2276  :( K ) );
2277 
2278  SIMDType xmm1( (~C).load(i ,j) );
2279  SIMDType xmm2( (~C).load(i+1UL,j) );
2280 
2281  for( size_t k=kbegin; k<kend; ++k ) {
2282  const SIMDType b1( B.load(k,j) );
2283  xmm1 += set( A(i ,k) ) * b1;
2284  xmm2 += set( A(i+1UL,k) ) * b1;
2285  }
2286 
2287  (~C).store( i , j, xmm1 );
2288  (~C).store( i+1UL, j, xmm2 );
2289  }
2290 
2291  if( i < iend )
2292  {
2293  const size_t kbegin( ( IsUpper<MT4>::value )
2294  ?( ( IsLower<MT5>::value )
2295  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2296  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2297  :( IsLower<MT5>::value ? j : 0UL ) );
2298 
2299  SIMDType xmm1( (~C).load(i,j) );
2300 
2301  for( size_t k=kbegin; k<K; ++k ) {
2302  xmm1 += set( A(i,k) ) * B.load(k,j);
2303  }
2304 
2305  (~C).store( i, j, xmm1 );
2306  }
2307  }
2308 
2309  for( ; remainder && j<N; ++j )
2310  {
2311  const size_t iend( UPP ? j+1UL : M );
2312  size_t i( LOW ? j : 0UL );
2313 
2314  for( ; (i+2UL) <= iend; i+=2UL )
2315  {
2316  const size_t kbegin( ( IsUpper<MT4>::value )
2317  ?( ( IsLower<MT5>::value )
2318  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2319  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2320  :( IsLower<MT5>::value ? j : 0UL ) );
2321  const size_t kend( ( IsLower<MT4>::value )
2322  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
2323  :( K ) );
2324 
2325  ElementType value1( (~C)(i ,j) );
2326  ElementType value2( (~C)(i+1UL,j) );;
2327 
2328  for( size_t k=kbegin; k<kend; ++k ) {
2329  value1 += A(i ,k) * B(k,j);
2330  value2 += A(i+1UL,k) * B(k,j);
2331  }
2332 
2333  (~C)(i ,j) = value1;
2334  (~C)(i+1UL,j) = value2;
2335  }
2336 
2337  if( i < iend )
2338  {
2339  const size_t kbegin( ( IsUpper<MT4>::value )
2340  ?( ( IsLower<MT5>::value )
2341  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2342  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2343  :( IsLower<MT5>::value ? j : 0UL ) );
2344 
2345  ElementType value( (~C)(i,j) );
2346 
2347  for( size_t k=kbegin; k<K; ++k ) {
2348  value += A(i,k) * B(k,j);
2349  }
2350 
2351  (~C)(i,j) = value;
2352  }
2353  }
2354  }
2356  //**********************************************************************************************
2357 
2358  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
2373  template< typename MT3 // Type of the left-hand side target matrix
2374  , typename MT4 // Type of the left-hand side matrix operand
2375  , typename MT5 > // Type of the right-hand side matrix operand
2377  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2378  {
2383 
2384  const ForwardFunctor fwd;
2385 
2387  const OppositeType_<MT4> tmp( serial( A ) );
2388  addAssign( ~C, fwd( tmp * B ) );
2389  }
2391  const OppositeType_<MT5> tmp( serial( B ) );
2392  addAssign( ~C, fwd( A * tmp ) );
2393  }
2394  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2395  const OppositeType_<MT4> tmp( serial( A ) );
2396  addAssign( ~C, fwd( tmp * B ) );
2397  }
2398  else {
2399  const OppositeType_<MT5> tmp( serial( B ) );
2400  addAssign( ~C, fwd( A * tmp ) );
2401  }
2402  }
2404  //**********************************************************************************************
2405 
2406  //**Default addition assignment to dense matrices (large matrices)******************************
2420  template< typename MT3 // Type of the left-hand side target matrix
2421  , typename MT4 // Type of the left-hand side matrix operand
2422  , typename MT5 > // Type of the right-hand side matrix operand
2424  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2425  {
2426  selectDefaultAddAssignKernel( C, A, B );
2427  }
2429  //**********************************************************************************************
2430 
2431  //**Vectorized default addition assignment to dense matrices (large matrices)*******************
2446  template< typename MT3 // Type of the left-hand side target matrix
2447  , typename MT4 // Type of the left-hand side matrix operand
2448  , typename MT5 > // Type of the right-hand side matrix operand
2450  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2451  {
2452  if( LOW )
2453  lmmm( C, A, B, ElementType(1), ElementType(1) );
2454  else if( UPP )
2455  ummm( C, A, B, ElementType(1), ElementType(1) );
2456  else
2457  mmm( C, A, B, ElementType(1), ElementType(1) );
2458  }
2460  //**********************************************************************************************
2461 
2462  //**BLAS-based addition assignment to dense matrices (default)**********************************
2476  template< typename MT3 // Type of the left-hand side target matrix
2477  , typename MT4 // Type of the left-hand side matrix operand
2478  , typename MT5 > // Type of the right-hand side matrix operand
2480  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2481  {
2482  selectLargeAddAssignKernel( C, A, B );
2483  }
2485  //**********************************************************************************************
2486 
2487  //**BLAS-based addition assignment to dense matrices********************************************
2488 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
2489 
2502  template< typename MT3 // Type of the left-hand side target matrix
2503  , typename MT4 // Type of the left-hand side matrix operand
2504  , typename MT5 > // Type of the right-hand side matrix operand
2506  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2507  {
2508  typedef ElementType_<MT3> ET;
2509 
2510  if( IsTriangular<MT4>::value ) {
2511  ResultType_<MT3> tmp( serial( B ) );
2512  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2513  addAssign( C, tmp );
2514  }
2515  else if( IsTriangular<MT5>::value ) {
2516  ResultType_<MT3> tmp( serial( A ) );
2517  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2518  addAssign( C, tmp );
2519  }
2520  else {
2521  gemm( C, A, B, ET(1), ET(1) );
2522  }
2523  }
2525 #endif
2526  //**********************************************************************************************
2527 
2528  //**Restructuring addition assignment to column-major matrices**********************************
2543  template< typename MT > // Type of the target matrix
2545  addAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
2546  {
2548 
2550 
2551  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2552  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2553 
2554  const ForwardFunctor fwd;
2555 
2557  addAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
2558  else if( IsSymmetric<MT1>::value )
2559  addAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
2560  else
2561  addAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
2562  }
2564  //**********************************************************************************************
2565 
2566  //**Addition assignment to sparse matrices******************************************************
2567  // No special implementation for the addition assignment to sparse matrices.
2568  //**********************************************************************************************
2569 
2570  //**Subtraction assignment to dense matrices****************************************************
2583  template< typename MT // Type of the target dense matrix
2584  , bool SO > // Storage order of the target dense matrix
2586  subAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
2587  {
2589 
2590  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2591  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2592 
2593  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2594  return;
2595  }
2596 
2597  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
2598  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
2599 
2600  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2601  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2602  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2603  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2604  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2605  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2606 
2607  DMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
2608  }
2610  //**********************************************************************************************
2611 
2612  //**Subtraction assignment to dense matrices (kernel selection)*********************************
2623  template< typename MT3 // Type of the left-hand side target matrix
2624  , typename MT4 // Type of the left-hand side matrix operand
2625  , typename MT5 > // Type of the right-hand side matrix operand
2626  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2627  {
2628  if( ( IsDiagonal<MT5>::value ) ||
2629  ( !BLAZE_DEBUG_MODE && B.columns() <= SIMDSIZE*10UL ) ||
2630  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
2631  selectSmallSubAssignKernel( C, A, B );
2632  else
2633  selectBlasSubAssignKernel( C, A, B );
2634  }
2636  //**********************************************************************************************
2637 
2638  //**Default subtraction assignment to dense matrices (general/general)**************************
2652  template< typename MT3 // Type of the left-hand side target matrix
2653  , typename MT4 // Type of the left-hand side matrix operand
2654  , typename MT5 > // Type of the right-hand side matrix operand
2655  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
2656  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2657  {
2658  const size_t M( A.rows() );
2659  const size_t N( B.columns() );
2660  const size_t K( A.columns() );
2661 
2662  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
2663 
2664  for( size_t i=0UL; i<M; ++i )
2665  {
2666  const size_t kbegin( ( IsUpper<MT4>::value )
2667  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2668  :( 0UL ) );
2669  const size_t kend( ( IsLower<MT4>::value )
2670  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
2671  :( K ) );
2672  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
2673 
2674  for( size_t k=kbegin; k<kend; ++k )
2675  {
2676  const size_t jbegin( ( IsUpper<MT5>::value )
2678  ?( UPP ? max(i,k+1UL) : k+1UL )
2679  :( UPP ? max(i,k) : k ) )
2680  :( UPP ? i : 0UL ) );
2681  const size_t jend( ( IsLower<MT5>::value )
2683  ?( LOW ? min(i+1UL,k) : k )
2684  :( LOW ? min(i,k)+1UL : k+1UL ) )
2685  :( LOW ? i+1UL : N ) );
2686 
2687  if( ( LOW || UPP ) && ( jbegin >= jend ) ) continue;
2688  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2689 
2690  const size_t jnum( jend - jbegin );
2691  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
2692 
2693  for( size_t j=jbegin; j<jpos; j+=2UL ) {
2694  C(i,j ) -= A(i,k) * B(k,j );
2695  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
2696  }
2697  if( jpos < jend ) {
2698  C(i,jpos) -= A(i,k) * B(k,jpos);
2699  }
2700  }
2701  }
2702  }
2704  //**********************************************************************************************
2705 
2706  //**Default subtraction assignment to dense matrices (general/diagonal)*************************
2720  template< typename MT3 // Type of the left-hand side target matrix
2721  , typename MT4 // Type of the left-hand side matrix operand
2722  , typename MT5 > // Type of the right-hand side matrix operand
2723  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
2724  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2725  {
2727 
2728  const size_t M( A.rows() );
2729  const size_t N( B.columns() );
2730 
2731  for( size_t i=0UL; i<M; ++i )
2732  {
2733  const size_t jbegin( ( IsUpper<MT4>::value )
2734  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2735  :( 0UL ) );
2736  const size_t jend( ( IsLower<MT4>::value )
2737  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
2738  :( N ) );
2739  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2740 
2741  const size_t jnum( jend - jbegin );
2742  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
2743 
2744  for( size_t j=jbegin; j<jpos; j+=2UL ) {
2745  C(i,j ) -= A(i,j ) * B(j ,j );
2746  C(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL);
2747  }
2748  if( jpos < jend ) {
2749  C(i,jpos) -= A(i,jpos) * B(jpos,jpos);
2750  }
2751  }
2752  }
2754  //**********************************************************************************************
2755 
2756  //**Default subtraction assignment to dense matrices (diagonal/general)*************************
2770  template< typename MT3 // Type of the left-hand side target matrix
2771  , typename MT4 // Type of the left-hand side matrix operand
2772  , typename MT5 > // Type of the right-hand side matrix operand
2773  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
2774  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2775  {
2777 
2778  const size_t M( A.rows() );
2779  const size_t N( B.columns() );
2780 
2781  for( size_t i=0UL; i<M; ++i )
2782  {
2783  const size_t jbegin( ( IsUpper<MT5>::value )
2784  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
2785  :( 0UL ) );
2786  const size_t jend( ( IsLower<MT5>::value )
2787  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
2788  :( N ) );
2789  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2790 
2791  const size_t jnum( jend - jbegin );
2792  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
2793 
2794  for( size_t j=jbegin; j<jpos; j+=2UL ) {
2795  C(i,j ) -= A(i,i) * B(i,j );
2796  C(i,j+1UL) -= A(i,i) * B(i,j+1UL);
2797  }
2798  if( jpos < jend ) {
2799  C(i,jpos) -= A(i,i) * B(i,jpos);
2800  }
2801  }
2802  }
2804  //**********************************************************************************************
2805 
2806  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
2820  template< typename MT3 // Type of the left-hand side target matrix
2821  , typename MT4 // Type of the left-hand side matrix operand
2822  , typename MT5 > // Type of the right-hand side matrix operand
2823  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
2824  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2825  {
2827 
2828  for( size_t i=0UL; i<A.rows(); ++i ) {
2829  C(i,i) -= A(i,i) * B(i,i);
2830  }
2831  }
2833  //**********************************************************************************************
2834 
2835  //**Default subtraction assignment to dense matrices (small matrices)***************************
2849  template< typename MT3 // Type of the left-hand side target matrix
2850  , typename MT4 // Type of the left-hand side matrix operand
2851  , typename MT5 > // Type of the right-hand side matrix operand
2853  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2854  {
2855  selectDefaultSubAssignKernel( C, A, B );
2856  }
2858  //**********************************************************************************************
2859 
2860  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
2875  template< typename MT3 // Type of the left-hand side target matrix
2876  , typename MT4 // Type of the left-hand side matrix operand
2877  , typename MT5 > // Type of the right-hand side matrix operand
2879  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2880  {
2881  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT5>::value );
2882 
2883  const size_t M( A.rows() );
2884  const size_t N( B.columns() );
2885  const size_t K( A.columns() );
2886 
2887  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
2888 
2889  const size_t jpos( remainder ? ( N & size_t(-SIMDSIZE) ) : N );
2890  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % SIMDSIZE ) ) == jpos, "Invalid end calculation" );
2891 
2892  size_t j( 0UL );
2893 
2895  {
2896  for( ; !LOW && !UPP && (j+SIMDSIZE*7UL) < jpos; j+=SIMDSIZE*8UL ) {
2897  for( size_t i=0UL; i<M; ++i )
2898  {
2899  const size_t kbegin( ( IsUpper<MT4>::value )
2900  ?( ( IsLower<MT5>::value )
2901  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2902  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2903  :( IsLower<MT5>::value ? j : 0UL ) );
2904  const size_t kend( ( IsLower<MT4>::value )
2905  ?( ( IsUpper<MT5>::value )
2906  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+SIMDSIZE*8UL, K ) )
2907  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
2908  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*8UL, K ) : K ) );
2909 
2910  SIMDType xmm1( (~C).load(i,j ) );
2911  SIMDType xmm2( (~C).load(i,j+SIMDSIZE ) );
2912  SIMDType xmm3( (~C).load(i,j+SIMDSIZE*2UL) );
2913  SIMDType xmm4( (~C).load(i,j+SIMDSIZE*3UL) );
2914  SIMDType xmm5( (~C).load(i,j+SIMDSIZE*4UL) );
2915  SIMDType xmm6( (~C).load(i,j+SIMDSIZE*5UL) );
2916  SIMDType xmm7( (~C).load(i,j+SIMDSIZE*6UL) );
2917  SIMDType xmm8( (~C).load(i,j+SIMDSIZE*7UL) );
2918 
2919  for( size_t k=kbegin; k<kend; ++k ) {
2920  const SIMDType a1( set( A(i,k) ) );
2921  xmm1 -= a1 * B.load(k,j );
2922  xmm2 -= a1 * B.load(k,j+SIMDSIZE );
2923  xmm3 -= a1 * B.load(k,j+SIMDSIZE*2UL);
2924  xmm4 -= a1 * B.load(k,j+SIMDSIZE*3UL);
2925  xmm5 -= a1 * B.load(k,j+SIMDSIZE*4UL);
2926  xmm6 -= a1 * B.load(k,j+SIMDSIZE*5UL);
2927  xmm7 -= a1 * B.load(k,j+SIMDSIZE*6UL);
2928  xmm8 -= a1 * B.load(k,j+SIMDSIZE*7UL);
2929  }
2930 
2931  (~C).store( i, j , xmm1 );
2932  (~C).store( i, j+SIMDSIZE , xmm2 );
2933  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
2934  (~C).store( i, j+SIMDSIZE*3UL, xmm4 );
2935  (~C).store( i, j+SIMDSIZE*4UL, xmm5 );
2936  (~C).store( i, j+SIMDSIZE*5UL, xmm6 );
2937  (~C).store( i, j+SIMDSIZE*6UL, xmm7 );
2938  (~C).store( i, j+SIMDSIZE*7UL, xmm8 );
2939  }
2940  }
2941  }
2942 
2943  for( ; !LOW && !UPP && (j+SIMDSIZE*4UL) < jpos; j+=SIMDSIZE*5UL )
2944  {
2945  size_t i( 0UL );
2946 
2947  for( ; (i+2UL) <= M; i+=2UL )
2948  {
2949  const size_t kbegin( ( IsUpper<MT4>::value )
2950  ?( ( IsLower<MT5>::value )
2951  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
2952  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2953  :( IsLower<MT5>::value ? j : 0UL ) );
2954  const size_t kend( ( IsLower<MT4>::value )
2955  ?( ( IsUpper<MT5>::value )
2956  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*5UL, K ) )
2957  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
2958  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*5UL, K ) : K ) );
2959 
2960  SIMDType xmm1 ( (~C).load(i ,j ) );
2961  SIMDType xmm2 ( (~C).load(i ,j+SIMDSIZE ) );
2962  SIMDType xmm3 ( (~C).load(i ,j+SIMDSIZE*2UL) );
2963  SIMDType xmm4 ( (~C).load(i ,j+SIMDSIZE*3UL) );
2964  SIMDType xmm5 ( (~C).load(i ,j+SIMDSIZE*4UL) );
2965  SIMDType xmm6 ( (~C).load(i+1UL,j ) );
2966  SIMDType xmm7 ( (~C).load(i+1UL,j+SIMDSIZE ) );
2967  SIMDType xmm8 ( (~C).load(i+1UL,j+SIMDSIZE*2UL) );
2968  SIMDType xmm9 ( (~C).load(i+1UL,j+SIMDSIZE*3UL) );
2969  SIMDType xmm10( (~C).load(i+1UL,j+SIMDSIZE*4UL) );
2970 
2971  for( size_t k=kbegin; k<kend; ++k ) {
2972  const SIMDType a1( set( A(i ,k) ) );
2973  const SIMDType a2( set( A(i+1UL,k) ) );
2974  const SIMDType b1( B.load(k,j ) );
2975  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
2976  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
2977  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
2978  const SIMDType b5( B.load(k,j+SIMDSIZE*4UL) );
2979  xmm1 -= a1 * b1;
2980  xmm2 -= a1 * b2;
2981  xmm3 -= a1 * b3;
2982  xmm4 -= a1 * b4;
2983  xmm5 -= a1 * b5;
2984  xmm6 -= a2 * b1;
2985  xmm7 -= a2 * b2;
2986  xmm8 -= a2 * b3;
2987  xmm9 -= a2 * b4;
2988  xmm10 -= a2 * b5;
2989  }
2990 
2991  (~C).store( i , j , xmm1 );
2992  (~C).store( i , j+SIMDSIZE , xmm2 );
2993  (~C).store( i , j+SIMDSIZE*2UL, xmm3 );
2994  (~C).store( i , j+SIMDSIZE*3UL, xmm4 );
2995  (~C).store( i , j+SIMDSIZE*4UL, xmm5 );
2996  (~C).store( i+1UL, j , xmm6 );
2997  (~C).store( i+1UL, j+SIMDSIZE , xmm7 );
2998  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm8 );
2999  (~C).store( i+1UL, j+SIMDSIZE*3UL, xmm9 );
3000  (~C).store( i+1UL, j+SIMDSIZE*4UL, xmm10 );
3001  }
3002 
3003  if( i < M )
3004  {
3005  const size_t kbegin( ( IsUpper<MT4>::value )
3006  ?( ( IsLower<MT5>::value )
3007  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3008  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3009  :( IsLower<MT5>::value ? j : 0UL ) );
3010  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*5UL, K ) ):( K ) );
3011 
3012  SIMDType xmm1( (~C).load(i,j ) );
3013  SIMDType xmm2( (~C).load(i,j+SIMDSIZE ) );
3014  SIMDType xmm3( (~C).load(i,j+SIMDSIZE*2UL) );
3015  SIMDType xmm4( (~C).load(i,j+SIMDSIZE*3UL) );
3016  SIMDType xmm5( (~C).load(i,j+SIMDSIZE*4UL) );
3017 
3018  for( size_t k=kbegin; k<kend; ++k ) {
3019  const SIMDType a1( set( A(i,k) ) );
3020  xmm1 -= a1 * B.load(k,j );
3021  xmm2 -= a1 * B.load(k,j+SIMDSIZE );
3022  xmm3 -= a1 * B.load(k,j+SIMDSIZE*2UL);
3023  xmm4 -= a1 * B.load(k,j+SIMDSIZE*3UL);
3024  xmm5 -= a1 * B.load(k,j+SIMDSIZE*4UL);
3025  }
3026 
3027  (~C).store( i, j , xmm1 );
3028  (~C).store( i, j+SIMDSIZE , xmm2 );
3029  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
3030  (~C).store( i, j+SIMDSIZE*3UL, xmm4 );
3031  (~C).store( i, j+SIMDSIZE*4UL, xmm5 );
3032  }
3033  }
3034 
3035  for( ; !LOW && !UPP && (j+SIMDSIZE*3UL) < jpos; j+=SIMDSIZE*4UL )
3036  {
3037  size_t i( 0UL );
3038 
3039  for( ; (i+2UL) <= M; i+=2UL )
3040  {
3041  const size_t kbegin( ( IsUpper<MT4>::value )
3042  ?( ( IsLower<MT5>::value )
3043  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3044  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3045  :( IsLower<MT5>::value ? j : 0UL ) );
3046  const size_t kend( ( IsLower<MT4>::value )
3047  ?( ( IsUpper<MT5>::value )
3048  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*4UL, K ) )
3049  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
3050  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*4UL, K ) : K ) );
3051 
3052  SIMDType xmm1( (~C).load(i ,j ) );
3053  SIMDType xmm2( (~C).load(i ,j+SIMDSIZE ) );
3054  SIMDType xmm3( (~C).load(i ,j+SIMDSIZE*2UL) );
3055  SIMDType xmm4( (~C).load(i ,j+SIMDSIZE*3UL) );
3056  SIMDType xmm5( (~C).load(i+1UL,j ) );
3057  SIMDType xmm6( (~C).load(i+1UL,j+SIMDSIZE ) );
3058  SIMDType xmm7( (~C).load(i+1UL,j+SIMDSIZE*2UL) );
3059  SIMDType xmm8( (~C).load(i+1UL,j+SIMDSIZE*3UL) );
3060 
3061  for( size_t k=kbegin; k<kend; ++k ) {
3062  const SIMDType a1( set( A(i ,k) ) );
3063  const SIMDType a2( set( A(i+1UL,k) ) );
3064  const SIMDType b1( B.load(k,j ) );
3065  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
3066  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
3067  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
3068  xmm1 -= a1 * b1;
3069  xmm2 -= a1 * b2;
3070  xmm3 -= a1 * b3;
3071  xmm4 -= a1 * b4;
3072  xmm5 -= a2 * b1;
3073  xmm6 -= a2 * b2;
3074  xmm7 -= a2 * b3;
3075  xmm8 -= a2 * b4;
3076  }
3077 
3078  (~C).store( i , j , xmm1 );
3079  (~C).store( i , j+SIMDSIZE , xmm2 );
3080  (~C).store( i , j+SIMDSIZE*2UL, xmm3 );
3081  (~C).store( i , j+SIMDSIZE*3UL, xmm4 );
3082  (~C).store( i+1UL, j , xmm5 );
3083  (~C).store( i+1UL, j+SIMDSIZE , xmm6 );
3084  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm7 );
3085  (~C).store( i+1UL, j+SIMDSIZE*3UL, xmm8 );
3086  }
3087 
3088  if( i < M )
3089  {
3090  const size_t kbegin( ( IsUpper<MT4>::value )
3091  ?( ( IsLower<MT5>::value )
3092  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3093  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3094  :( IsLower<MT5>::value ? j : 0UL ) );
3095  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*4UL, K ) ):( K ) );
3096 
3097  SIMDType xmm1( (~C).load(i,j ) );
3098  SIMDType xmm2( (~C).load(i,j+SIMDSIZE ) );
3099  SIMDType xmm3( (~C).load(i,j+SIMDSIZE*2UL) );
3100  SIMDType xmm4( (~C).load(i,j+SIMDSIZE*3UL) );
3101 
3102  for( size_t k=kbegin; k<kend; ++k ) {
3103  const SIMDType a1( set( A(i,k) ) );
3104  xmm1 -= a1 * B.load(k,j );
3105  xmm2 -= a1 * B.load(k,j+SIMDSIZE );
3106  xmm3 -= a1 * B.load(k,j+SIMDSIZE*2UL);
3107  xmm4 -= a1 * B.load(k,j+SIMDSIZE*3UL);
3108  }
3109 
3110  (~C).store( i, j , xmm1 );
3111  (~C).store( i, j+SIMDSIZE , xmm2 );
3112  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
3113  (~C).store( i, j+SIMDSIZE*3UL, xmm4 );
3114  }
3115  }
3116 
3117  for( ; !LOW && !UPP && (j+SIMDSIZE*2UL) < jpos; j+=SIMDSIZE*3UL )
3118  {
3119  size_t i( 0UL );
3120 
3121  for( ; (i+2UL) <= M; i+=2UL )
3122  {
3123  const size_t kbegin( ( IsUpper<MT4>::value )
3124  ?( ( IsLower<MT5>::value )
3125  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3126  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3127  :( IsLower<MT5>::value ? j : 0UL ) );
3128  const size_t kend( ( IsLower<MT4>::value )
3129  ?( ( IsUpper<MT5>::value )
3130  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*3UL, K ) )
3131  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
3132  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*3UL, K ) : K ) );
3133 
3134  SIMDType xmm1( (~C).load(i ,j ) );
3135  SIMDType xmm2( (~C).load(i ,j+SIMDSIZE ) );
3136  SIMDType xmm3( (~C).load(i ,j+SIMDSIZE*2UL) );
3137  SIMDType xmm4( (~C).load(i+1UL,j ) );
3138  SIMDType xmm5( (~C).load(i+1UL,j+SIMDSIZE ) );
3139  SIMDType xmm6( (~C).load(i+1UL,j+SIMDSIZE*2UL) );
3140 
3141  for( size_t k=kbegin; k<kend; ++k ) {
3142  const SIMDType a1( set( A(i ,k) ) );
3143  const SIMDType a2( set( A(i+1UL,k) ) );
3144  const SIMDType b1( B.load(k,j ) );
3145  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
3146  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
3147  xmm1 -= a1 * b1;
3148  xmm2 -= a1 * b2;
3149  xmm3 -= a1 * b3;
3150  xmm4 -= a2 * b1;
3151  xmm5 -= a2 * b2;
3152  xmm6 -= a2 * b3;
3153  }
3154 
3155  (~C).store( i , j , xmm1 );
3156  (~C).store( i , j+SIMDSIZE , xmm2 );
3157  (~C).store( i , j+SIMDSIZE*2UL, xmm3 );
3158  (~C).store( i+1UL, j , xmm4 );
3159  (~C).store( i+1UL, j+SIMDSIZE , xmm5 );
3160  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm6 );
3161  }
3162 
3163  if( i < M )
3164  {
3165  const size_t kbegin( ( IsUpper<MT4>::value )
3166  ?( ( IsLower<MT5>::value )
3167  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3168  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3169  :( IsLower<MT5>::value ? j : 0UL ) );
3170  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*3UL, K ) ):( K ) );
3171 
3172  SIMDType xmm1( (~C).load(i,j ) );
3173  SIMDType xmm2( (~C).load(i,j+SIMDSIZE ) );
3174  SIMDType xmm3( (~C).load(i,j+SIMDSIZE*2UL) );
3175 
3176  for( size_t k=kbegin; k<kend; ++k ) {
3177  const SIMDType a1( set( A(i,k) ) );
3178  xmm1 -= a1 * B.load(k,j );
3179  xmm2 -= a1 * B.load(k,j+SIMDSIZE );
3180  xmm3 -= a1 * B.load(k,j+SIMDSIZE*2UL);
3181  }
3182 
3183  (~C).store( i, j , xmm1 );
3184  (~C).store( i, j+SIMDSIZE , xmm2 );
3185  (~C).store( i, j+SIMDSIZE*2UL, xmm3 );
3186  }
3187  }
3188 
3189  for( ; !( LOW && UPP ) && (j+SIMDSIZE) < jpos; j+=SIMDSIZE*2UL )
3190  {
3191  const size_t iend( UPP ? min(j+SIMDSIZE*2UL,M) : M );
3192  size_t i( LOW ? j : 0UL );
3193 
3194  for( ; (i+2UL) <= iend; i+=2UL )
3195  {
3196  const size_t kbegin( ( IsUpper<MT4>::value )
3197  ?( ( IsLower<MT5>::value )
3198  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3199  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3200  :( IsLower<MT5>::value ? j : 0UL ) );
3201  const size_t kend( ( IsLower<MT4>::value )
3202  ?( ( IsUpper<MT5>::value )
3203  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*2UL, K ) )
3204  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
3205  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*2UL, K ) : K ) );
3206 
3207  SIMDType xmm1( (~C).load(i ,j ) );
3208  SIMDType xmm2( (~C).load(i ,j+SIMDSIZE) );
3209  SIMDType xmm3( (~C).load(i+1UL,j ) );
3210  SIMDType xmm4( (~C).load(i+1UL,j+SIMDSIZE) );
3211 
3212  for( size_t k=kbegin; k<kend; ++k ) {
3213  const SIMDType a1( set( A(i ,k) ) );
3214  const SIMDType a2( set( A(i+1UL,k) ) );
3215  const SIMDType b1( B.load(k,j ) );
3216  const SIMDType b2( B.load(k,j+SIMDSIZE) );
3217  xmm1 -= a1 * b1;
3218  xmm2 -= a1 * b2;
3219  xmm3 -= a2 * b1;
3220  xmm4 -= a2 * b2;
3221  }
3222 
3223  (~C).store( i , j , xmm1 );
3224  (~C).store( i , j+SIMDSIZE, xmm2 );
3225  (~C).store( i+1UL, j , xmm3 );
3226  (~C).store( i+1UL, j+SIMDSIZE, xmm4 );
3227  }
3228 
3229  if( i < iend )
3230  {
3231  const size_t kbegin( ( IsUpper<MT4>::value )
3232  ?( ( IsLower<MT5>::value )
3233  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3234  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3235  :( IsLower<MT5>::value ? j : 0UL ) );
3236  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*2UL, K ) ):( K ) );
3237 
3238  SIMDType xmm1( (~C).load(i,j ) );
3239  SIMDType xmm2( (~C).load(i,j+SIMDSIZE) );
3240 
3241  for( size_t k=kbegin; k<kend; ++k ) {
3242  const SIMDType a1( set( A(i,k) ) );
3243  xmm1 -= a1 * B.load(k,j );
3244  xmm2 -= a1 * B.load(k,j+SIMDSIZE);
3245  }
3246 
3247  (~C).store( i, j , xmm1 );
3248  (~C).store( i, j+SIMDSIZE, xmm2 );
3249  }
3250  }
3251 
3252  for( ; j<jpos; j+=SIMDSIZE )
3253  {
3254  const size_t iend( LOW && UPP ? min(j+SIMDSIZE,M) : M );
3255  size_t i( LOW ? j : 0UL );
3256 
3257  for( ; (i+2UL) <= iend; i+=2UL )
3258  {
3259  const size_t kbegin( ( IsUpper<MT4>::value )
3260  ?( ( IsLower<MT5>::value )
3261  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3262  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3263  :( IsLower<MT5>::value ? j : 0UL ) );
3264  const size_t kend( ( IsLower<MT4>::value )
3265  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
3266  :( K ) );
3267 
3268  SIMDType xmm1( (~C).load(i ,j) );
3269  SIMDType xmm2( (~C).load(i+1UL,j) );
3270 
3271  for( size_t k=kbegin; k<kend; ++k ) {
3272  const SIMDType b1( B.load(k,j) );
3273  xmm1 -= set( A(i ,k) ) * b1;
3274  xmm2 -= set( A(i+1UL,k) ) * b1;
3275  }
3276 
3277  (~C).store( i , j, xmm1 );
3278  (~C).store( i+1UL, j, xmm2 );
3279  }
3280 
3281  if( i < iend )
3282  {
3283  const size_t kbegin( ( IsUpper<MT4>::value )
3284  ?( ( IsLower<MT5>::value )
3285  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3286  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3287  :( IsLower<MT5>::value ? j : 0UL ) );
3288 
3289  SIMDType xmm1( (~C).load(i,j) );
3290 
3291  for( size_t k=kbegin; k<K; ++k ) {
3292  xmm1 -= set( A(i,k) ) * B.load(k,j);
3293  }
3294 
3295  (~C).store( i, j, xmm1 );
3296  }
3297  }
3298 
3299  for( ; remainder && j<N; ++j )
3300  {
3301  const size_t iend( UPP ? j+1UL : M );
3302  size_t i( LOW ? j : 0UL );
3303 
3304  for( ; (i+2UL) <= iend; i+=2UL )
3305  {
3306  const size_t kbegin( ( IsUpper<MT4>::value )
3307  ?( ( IsLower<MT5>::value )
3308  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3309  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3310  :( IsLower<MT5>::value ? j : 0UL ) );
3311  const size_t kend( ( IsLower<MT4>::value )
3312  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
3313  :( K ) );
3314 
3315  ElementType value1( (~C)(i ,j) );
3316  ElementType value2( (~C)(i+1UL,j) );
3317 
3318  for( size_t k=kbegin; k<kend; ++k ) {
3319  value1 -= A(i ,k) * B(k,j);
3320  value2 -= A(i+1UL,k) * B(k,j);
3321  }
3322 
3323  (~C)(i ,j) = value1;
3324  (~C)(i+1UL,j) = value2;
3325  }
3326 
3327  if( i < iend )
3328  {
3329  const size_t kbegin( ( IsUpper<MT4>::value )
3330  ?( ( IsLower<MT5>::value )
3331  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
3332  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
3333  :( IsLower<MT5>::value ? j : 0UL ) );
3334 
3335  ElementType value( (~C)(i,j) );
3336 
3337  for( size_t k=kbegin; k<K; ++k ) {
3338  value -= A(i,k) * B(k,j);
3339  }
3340 
3341  (~C)(i,j) = value;
3342  }
3343  }
3344  }
3346  //**********************************************************************************************
3347 
3348  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
3363  template< typename MT3 // Type of the left-hand side target matrix
3364  , typename MT4 // Type of the left-hand side matrix operand
3365  , typename MT5 > // Type of the right-hand side matrix operand
3367  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3368  {
3373 
3374  const ForwardFunctor fwd;
3375 
3377  const OppositeType_<MT4> tmp( serial( A ) );
3378  subAssign( ~C, fwd( tmp * B ) );
3379  }
3381  const OppositeType_<MT5> tmp( serial( B ) );
3382  subAssign( ~C, fwd( A * tmp ) );
3383  }
3384  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3385  const OppositeType_<MT4> tmp( serial( A ) );
3386  subAssign( ~C, fwd( tmp * B ) );
3387  }
3388  else {
3389  const OppositeType_<MT5> tmp( serial( B ) );
3390  subAssign( ~C, fwd( A * tmp ) );
3391  }
3392  }
3394  //**********************************************************************************************
3395 
3396  //**Default subtraction assignment to dense matrices (large matrices)***************************
3410  template< typename MT3 // Type of the left-hand side target matrix
3411  , typename MT4 // Type of the left-hand side matrix operand
3412  , typename MT5 > // Type of the right-hand side matrix operand
3414  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3415  {
3416  selectDefaultSubAssignKernel( C, A, B );
3417  }
3419  //**********************************************************************************************
3420 
3421  //**Vectorized default subtraction assignment to dense matrices (large matrices)****************
3436  template< typename MT3 // Type of the left-hand side target matrix
3437  , typename MT4 // Type of the left-hand side matrix operand
3438  , typename MT5 > // Type of the right-hand side matrix operand
3440  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3441  {
3442  if( LOW )
3443  lmmm( C, A, B, ElementType(-1), ElementType(1) );
3444  else if( UPP )
3445  ummm( C, A, B, ElementType(-1), ElementType(1) );
3446  else
3447  mmm( C, A, B, ElementType(-1), ElementType(1) );
3448  }
3450  //**********************************************************************************************
3451 
3452  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3466  template< typename MT3 // Type of the left-hand side target matrix
3467  , typename MT4 // Type of the left-hand side matrix operand
3468  , typename MT5 > // Type of the right-hand side matrix operand
3470  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3471  {
3472  selectLargeSubAssignKernel( C, A, B );
3473  }
3475  //**********************************************************************************************
3476 
3477  //**BLAS-based subraction assignment to dense matrices******************************************
3478 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
3479 
3492  template< typename MT3 // Type of the left-hand side target matrix
3493  , typename MT4 // Type of the left-hand side matrix operand
3494  , typename MT5 > // Type of the right-hand side matrix operand
3496  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3497  {
3498  typedef ElementType_<MT3> ET;
3499 
3500  if( IsTriangular<MT4>::value ) {
3501  ResultType_<MT3> tmp( serial( B ) );
3502  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
3503  subAssign( C, tmp );
3504  }
3505  else if( IsTriangular<MT5>::value ) {
3506  ResultType_<MT3> tmp( serial( A ) );
3507  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
3508  subAssign( C, tmp );
3509  }
3510  else {
3511  gemm( C, A, B, ET(-1), ET(1) );
3512  }
3513  }
3515 #endif
3516  //**********************************************************************************************
3517 
3518  //**Restructuring subtraction assignment to column-major matrices*******************************
3533  template< typename MT > // Type of the target matrix
3535  subAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
3536  {
3538 
3540 
3541  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3542  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3543 
3544  const ForwardFunctor fwd;
3545 
3547  subAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
3548  else if( IsSymmetric<MT1>::value )
3549  subAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
3550  else
3551  subAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
3552  }
3554  //**********************************************************************************************
3555 
3556  //**Subtraction assignment to sparse matrices***************************************************
3557  // No special implementation for the subtraction assignment to sparse matrices.
3558  //**********************************************************************************************
3559 
3560  //**Multiplication assignment to dense matrices*************************************************
3561  // No special implementation for the multiplication assignment to dense matrices.
3562  //**********************************************************************************************
3563 
3564  //**Multiplication assignment to sparse matrices************************************************
3565  // No special implementation for the multiplication assignment to sparse matrices.
3566  //**********************************************************************************************
3567 
3568  //**SMP assignment to dense matrices************************************************************
3583  template< typename MT // Type of the target dense matrix
3584  , bool SO > // Storage order of the target dense matrix
3586  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
3587  {
3589 
3590  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3591  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3592 
3593  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
3594  return;
3595  }
3596  else if( rhs.lhs_.columns() == 0UL ) {
3597  reset( ~lhs );
3598  return;
3599  }
3600 
3601  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3602  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3603 
3604  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3605  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3606  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3607  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3608  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3609  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3610 
3611  smpAssign( ~lhs, A * B );
3612  }
3614  //**********************************************************************************************
3615 
3616  //**SMP assignment to sparse matrices***********************************************************
3631  template< typename MT // Type of the target sparse matrix
3632  , bool SO > // Storage order of the target sparse matrix
3634  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatDMatMultExpr& rhs )
3635  {
3637 
3639 
3646 
3647  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3648  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3649 
3650  const ForwardFunctor fwd;
3651 
3652  const TmpType tmp( rhs );
3653  smpAssign( ~lhs, fwd( tmp ) );
3654  }
3656  //**********************************************************************************************
3657 
3658  //**Restructuring SMP assignment to column-major matrices***************************************
3673  template< typename MT > // Type of the target matrix
3675  smpAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
3676  {
3678 
3680 
3681  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3682  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3683 
3684  const ForwardFunctor fwd;
3685 
3687  smpAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
3688  else if( IsSymmetric<MT1>::value )
3689  smpAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
3690  else
3691  smpAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
3692  }
3694  //**********************************************************************************************
3695 
3696  //**SMP addition assignment to dense matrices***************************************************
3712  template< typename MT // Type of the target dense matrix
3713  , bool SO > // Storage order of the target dense matrix
3716  {
3718 
3719  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3720  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3721 
3722  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
3723  return;
3724  }
3725 
3726  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3727  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3728 
3729  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3730  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3731  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3732  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3733  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3734  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3735 
3736  smpAddAssign( ~lhs, A * B );
3737  }
3739  //**********************************************************************************************
3740 
3741  //**Restructuring SMP addition assignment to column-major matrices******************************
3756  template< typename MT > // Type of the target matrix
3758  smpAddAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
3759  {
3761 
3763 
3764  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3765  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3766 
3767  const ForwardFunctor fwd;
3768 
3770  smpAddAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
3771  else if( IsSymmetric<MT1>::value )
3772  smpAddAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
3773  else
3774  smpAddAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
3775  }
3777  //**********************************************************************************************
3778 
3779  //**SMP addition assignment to sparse matrices**************************************************
3780  // No special implementation for the SMP addition assignment to sparse matrices.
3781  //**********************************************************************************************
3782 
3783  //**SMP subtraction assignment to dense matrices************************************************
3799  template< typename MT // Type of the target dense matrix
3800  , bool SO > // Storage order of the target dense matrix
3803  {
3805 
3806  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3807  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3808 
3809  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
3810  return;
3811  }
3812 
3813  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3814  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3815 
3816  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3817  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3818  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3819  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3820  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3821  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3822 
3823  smpSubAssign( ~lhs, A * B );
3824  }
3826  //**********************************************************************************************
3827 
3828  //**Restructuring SMP subtraction assignment to column-major matrices***************************
3843  template< typename MT > // Type of the target matrix
3845  smpSubAssign( Matrix<MT,true>& lhs, const DMatDMatMultExpr& rhs )
3846  {
3848 
3850 
3851  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3852  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3853 
3854  const ForwardFunctor fwd;
3855 
3857  smpSubAssign( ~lhs, fwd( trans( rhs.lhs_ ) * trans( rhs.rhs_ ) ) );
3858  else if( IsSymmetric<MT1>::value )
3859  smpSubAssign( ~lhs, fwd( trans( rhs.lhs_ ) * rhs.rhs_ ) );
3860  else
3861  smpSubAssign( ~lhs, fwd( rhs.lhs_ * trans( rhs.rhs_ ) ) );
3862  }
3864  //**********************************************************************************************
3865 
3866  //**SMP subtraction assignment to sparse matrices***********************************************
3867  // No special implementation for the SMP subtraction assignment to sparse matrices.
3868  //**********************************************************************************************
3869 
3870  //**SMP multiplication assignment to dense matrices*********************************************
3871  // No special implementation for the SMP multiplication assignment to dense matrices.
3872  //**********************************************************************************************
3873 
3874  //**SMP multiplication assignment to sparse matrices********************************************
3875  // No special implementation for the SMP multiplication assignment to sparse matrices.
3876  //**********************************************************************************************
3877 
3878  //**Compile time checks*************************************************************************
3886  //**********************************************************************************************
3887 };
3888 //*************************************************************************************************
3889 
3890 
3891 
3892 
3893 //=================================================================================================
3894 //
3895 // DMATSCALARMULTEXPR SPECIALIZATION
3896 //
3897 //=================================================================================================
3898 
3899 //*************************************************************************************************
3907 template< typename MT1 // Type of the left-hand side dense matrix
3908  , typename MT2 // Type of the right-hand side dense matrix
3909  , bool SF // Symmetry flag
3910  , bool HF // Hermitian flag
3911  , bool LF // Lower flag
3912  , bool UF // Upper flag
3913  , typename ST > // Type of the right-hand side scalar value
3914 class DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, ST, false >
3915  : public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, ST, false >, false >
3916  , private MatScalarMultExpr
3917  , private Computation
3918 {
3919  private:
3920  //**Type definitions****************************************************************************
3923 
3924  typedef ResultType_<MMM> RES;
3925  typedef ResultType_<MT1> RT1;
3926  typedef ResultType_<MT2> RT2;
3927  typedef ElementType_<RT1> ET1;
3928  typedef ElementType_<RT2> ET2;
3929  typedef CompositeType_<MT1> CT1;
3930  typedef CompositeType_<MT2> CT2;
3931  //**********************************************************************************************
3932 
3933  //**********************************************************************************************
3935  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
3936  //**********************************************************************************************
3937 
3938  //**********************************************************************************************
3940  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
3941  //**********************************************************************************************
3942 
3943  //**********************************************************************************************
3945  enum : bool {
3946  SYM = ( SF && !( HF || LF || UF ) ),
3947  HERM = ( HF && !( LF || UF ) ),
3948  LOW = ( LF || ( ( SF || HF ) && UF ) ),
3949  UPP = ( UF || ( ( SF || HF ) && LF ) )
3950  };
3951  //**********************************************************************************************
3952 
3953  //**********************************************************************************************
3955 
3960  template< typename T1, typename T2, typename T3 >
3961  struct CanExploitSymmetry {
3962  enum : bool { value = IsColumnMajorMatrix<T1>::value &&
3964  };
3965  //**********************************************************************************************
3966 
3967  //**********************************************************************************************
3969 
3972  template< typename T1, typename T2, typename T3 >
3973  struct IsEvaluationRequired {
3974  enum : bool { value = ( evaluateLeft || evaluateRight ) &&
3975  !CanExploitSymmetry<T1,T2,T3>::value };
3976  };
3977  //**********************************************************************************************
3978 
3979  //**********************************************************************************************
3981 
3983  template< typename T1, typename T2, typename T3, typename T4 >
3984  struct UseBlasKernel {
3986  !SYM && !HERM && !LOW && !UPP &&
3991  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
3996  IsSame< ElementType_<T1>, ElementType_<T3> >::value &&
3998  };
3999  //**********************************************************************************************
4000 
4001  //**********************************************************************************************
4003 
4005  template< typename T1, typename T2, typename T3, typename T4 >
4006  struct UseVectorizedDefaultKernel {
4007  enum : bool { value = useOptimizedKernels &&
4009  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
4013  , T4 >::value &&
4014  HasSIMDAdd< ElementType_<T2>, ElementType_<T3> >::value &&
4015  HasSIMDMult< ElementType_<T2>, ElementType_<T3> >::value };
4016  };
4017  //**********************************************************************************************
4018 
4019  //**********************************************************************************************
4021 
4023  typedef IfTrue_< HERM
4024  , DeclHerm
4025  , IfTrue_< SYM
4026  , DeclSym
4027  , IfTrue_< LOW
4028  , IfTrue_< UPP
4029  , DeclDiag
4030  , DeclLow >
4031  , IfTrue_< UPP
4032  , DeclUpp
4033  , Noop > > > > ForwardFunctor;
4034  //**********************************************************************************************
4035 
4036  public:
4037  //**Type definitions****************************************************************************
4040 
4041  typedef MultTrait_<RES,ST> ResultType;
4046  typedef const ElementType ReturnType;
4047  typedef const ResultType CompositeType;
4048 
4051 
4053  typedef ST RightOperand;
4054 
4057 
4060  //**********************************************************************************************
4061 
4062  //**Compilation flags***************************************************************************
4064  enum : bool { simdEnabled = !IsDiagonal<MT2>::value &&
4065  MT1::simdEnabled && MT2::simdEnabled &&
4069 
4071  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
4072  !evaluateRight && MT2::smpAssignable };
4073  //**********************************************************************************************
4074 
4075  //**SIMD properties*****************************************************************************
4077  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
4078  //**********************************************************************************************
4079 
4080  //**Constructor*********************************************************************************
4086  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
4087  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
4088  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
4089  {}
4090  //**********************************************************************************************
4091 
4092  //**Access operator*****************************************************************************
4099  inline ReturnType operator()( size_t i, size_t j ) const {
4100  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
4101  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
4102  return matrix_(i,j) * scalar_;
4103  }
4104  //**********************************************************************************************
4105 
4106  //**At function*********************************************************************************
4114  inline ReturnType at( size_t i, size_t j ) const {
4115  if( i >= matrix_.rows() ) {
4116  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
4117  }
4118  if( j >= matrix_.columns() ) {
4119  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
4120  }
4121  return (*this)(i,j);
4122  }
4123  //**********************************************************************************************
4124 
4125  //**Rows function*******************************************************************************
4130  inline size_t rows() const {
4131  return matrix_.rows();
4132  }
4133  //**********************************************************************************************
4134 
4135  //**Columns function****************************************************************************
4140  inline size_t columns() const {
4141  return matrix_.columns();
4142  }
4143  //**********************************************************************************************
4144 
4145  //**Left operand access*************************************************************************
4150  inline LeftOperand leftOperand() const {
4151  return matrix_;
4152  }
4153  //**********************************************************************************************
4154 
4155  //**Right operand access************************************************************************
4160  inline RightOperand rightOperand() const {
4161  return scalar_;
4162  }
4163  //**********************************************************************************************
4164 
4165  //**********************************************************************************************
4171  template< typename T >
4172  inline bool canAlias( const T* alias ) const {
4173  return matrix_.canAlias( alias );
4174  }
4175  //**********************************************************************************************
4176 
4177  //**********************************************************************************************
4183  template< typename T >
4184  inline bool isAliased( const T* alias ) const {
4185  return matrix_.isAliased( alias );
4186  }
4187  //**********************************************************************************************
4188 
4189  //**********************************************************************************************
4194  inline bool isAligned() const {
4195  return matrix_.isAligned();
4196  }
4197  //**********************************************************************************************
4198 
4199  //**********************************************************************************************
4204  inline bool canSMPAssign() const noexcept {
4205  return ( !BLAZE_BLAS_IS_PARALLEL ||
4206  ( rows() * columns() < DMATDMATMULT_THRESHOLD ) ) &&
4207  ( rows() * columns() >= SMP_DMATDMATMULT_THRESHOLD );
4208  }
4209  //**********************************************************************************************
4210 
4211  private:
4212  //**Member variables****************************************************************************
4213  LeftOperand matrix_;
4214  RightOperand scalar_;
4215  //**********************************************************************************************
4216 
4217  //**Assignment to dense matrices****************************************************************
4229  template< typename MT // Type of the target dense matrix
4230  , bool SO > // Storage order of the target dense matrix
4232  assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4233  {
4235 
4236  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4237  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4238 
4239  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
4240  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
4241 
4242  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4243  return;
4244  }
4245  else if( left.columns() == 0UL ) {
4246  reset( ~lhs );
4247  return;
4248  }
4249 
4250  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4251  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4252 
4253  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4254  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4255  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4256  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4257  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4258  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4259 
4260  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
4261  }
4262  //**********************************************************************************************
4263 
4264  //**Assignment to dense matrices (kernel selection)*********************************************
4275  template< typename MT3 // Type of the left-hand side target matrix
4276  , typename MT4 // Type of the left-hand side matrix operand
4277  , typename MT5 // Type of the right-hand side matrix operand
4278  , typename ST2 > // Type of the scalar value
4279  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4280  {
4281  if( ( IsDiagonal<MT5>::value ) ||
4282  ( !BLAZE_DEBUG_MODE && B.columns() <= SIMDSIZE*10UL ) ||
4283  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
4284  selectSmallAssignKernel( C, A, B, scalar );
4285  else
4286  selectBlasAssignKernel( C, A, B, scalar );
4287  }
4288  //**********************************************************************************************
4289 
4290  //**Default assignment to dense matrices (general/general)**************************************
4304  template< typename MT3 // Type of the left-hand side target matrix
4305  , typename MT4 // Type of the left-hand side matrix operand
4306  , typename MT5 // Type of the right-hand side matrix operand
4307  , typename ST2 > // Type of the scalar value
4309  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4310  {
4311  const size_t M( A.rows() );
4312  const size_t N( B.columns() );
4313  const size_t K( A.columns() );
4314 
4315  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
4316 
4317  for( size_t i=0UL; i<M; ++i )
4318  {
4319  const size_t kbegin( ( IsUpper<MT4>::value )
4320  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4321  :( 0UL ) );
4322  const size_t kend( ( IsLower<MT4>::value )
4323  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4324  :( K ) );
4325  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
4326 
4327  if( IsStrictlyTriangular<MT4>::value && kbegin == kend ) {
4328  for( size_t j=0UL; j<N; ++j ) {
4329  reset( C(i,j) );
4330  }
4331  continue;
4332  }
4333 
4334  {
4335  const size_t jbegin( ( IsUpper<MT5>::value )
4337  ?( UPP ? max(i,kbegin+1UL) : kbegin+1UL )
4338  :( UPP ? max(i,kbegin) : kbegin ) )
4339  :( UPP ? i : 0UL ) );
4340  const size_t jend( ( IsLower<MT5>::value )
4342  ?( LOW ? min(i+1UL,kbegin) : kbegin )
4343  :( LOW ? min(i,kbegin)+1UL : kbegin+1UL ) )
4344  :( LOW ? i+1UL : N ) );
4345 
4346  if( ( IsUpper<MT4>::value && IsUpper<MT5>::value ) || UPP ) {
4347  for( size_t j=0UL; j<jbegin; ++j ) {
4348  reset( C(i,j) );
4349  }
4350  }
4351  else if( IsStrictlyUpper<MT5>::value ) {
4352  reset( C(i,0UL) );
4353  }
4354  for( size_t j=jbegin; j<jend; ++j ) {
4355  C(i,j) = A(i,kbegin) * B(kbegin,j);
4356  }
4357  if( ( IsLower<MT4>::value && IsLower<MT5>::value ) || LOW ) {
4358  for( size_t j=jend; j<N; ++j ) {
4359  reset( C(i,j) );
4360  }
4361  }
4362  else if( IsStrictlyLower<MT5>::value ) {
4363  reset( C(i,N-1UL) );
4364  }
4365  }
4366 
4367  for( size_t k=kbegin+1UL; k<kend; ++k )
4368  {
4369  const size_t jbegin( ( IsUpper<MT5>::value )
4371  ?( SYM || HERM || UPP ? max( i, k+1UL ) : k+1UL )
4372  :( SYM || HERM || UPP ? max( i, k ) : k ) )
4373  :( SYM || HERM || UPP ? i : 0UL ) );
4374  const size_t jend( ( IsLower<MT5>::value )
4376  ?( LOW ? min(i+1UL,k-1UL) : k-1UL )
4377  :( LOW ? min(i+1UL,k) : k ) )
4378  :( LOW ? i+1UL : N ) );
4379 
4380  if( ( SYM || HERM || LOW || UPP ) && ( jbegin > jend ) ) continue;
4381  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4382 
4383  for( size_t j=jbegin; j<jend; ++j ) {
4384  C(i,j) += A(i,k) * B(k,j);
4385  }
4386  if( IsLower<MT5>::value ) {
4387  C(i,jend) = A(i,k) * B(k,jend);
4388  }
4389  }
4390 
4391  {
4392  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4394  :( SYM || HERM || UPP ? i : 0UL ) );
4395  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
4397  :( LOW ? i+1UL : N ) );
4398 
4399  if( ( SYM || HERM || LOW || UPP ) && ( jbegin > jend ) ) continue;
4400  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4401 
4402  for( size_t j=jbegin; j<jend; ++j ) {
4403  C(i,j) *= scalar;
4404  }
4405  }
4406  }
4407 
4408  if( SYM || HERM ) {
4409  for( size_t i=1UL; i<M; ++i ) {
4410  for( size_t j=0UL; j<i; ++j ) {
4411  C(i,j) = HERM ? conj( C(j,i) ) : C(j,i);
4412  }
4413  }
4414  }
4415  }
4416  //**********************************************************************************************
4417 
4418  //**Default assignment to dense matrices (general/diagonal)*************************************
4432  template< typename MT3 // Type of the left-hand side target matrix
4433  , typename MT4 // Type of the left-hand side matrix operand
4434  , typename MT5 // Type of the right-hand side matrix operand
4435  , typename ST2 > // Type of the scalar value
4436  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
4437  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4438  {
4440 
4441  const size_t M( A.rows() );
4442  const size_t N( B.columns() );
4443 
4444  for( size_t i=0UL; i<M; ++i )
4445  {
4446  const size_t jbegin( ( IsUpper<MT4>::value )
4447  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4448  :( 0UL ) );
4449  const size_t jend( ( IsLower<MT4>::value )
4450  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4451  :( N ) );
4452  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4453 
4454  if( IsUpper<MT4>::value ) {
4455  for( size_t j=0UL; j<jbegin; ++j ) {
4456  reset( C(i,j) );
4457  }
4458  }
4459  for( size_t j=jbegin; j<jend; ++j ) {
4460  C(i,j) = A(i,j) * B(j,j) * scalar;
4461  }
4462  if( IsLower<MT4>::value ) {
4463  for( size_t j=jend; j<N; ++j ) {
4464  reset( C(i,j) );
4465  }
4466  }
4467  }
4468  }
4469  //**********************************************************************************************
4470 
4471  //**Default assignment to dense matrices (diagonal/general)*************************************
4485  template< typename MT3 // Type of the left-hand side target matrix
4486  , typename MT4 // Type of the left-hand side matrix operand
4487  , typename MT5 // Type of the right-hand side matrix operand
4488  , typename ST2 > // Type of the scalar value
4490  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4491  {
4493 
4494  const size_t M( A.rows() );
4495  const size_t N( B.columns() );
4496 
4497  for( size_t i=0UL; i<M; ++i )
4498  {
4499  const size_t jbegin( ( IsUpper<MT5>::value )
4500  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
4501  :( 0UL ) );
4502  const size_t jend( ( IsLower<MT5>::value )
4503  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
4504  :( N ) );
4505  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4506 
4507  if( IsUpper<MT5>::value ) {
4508  for( size_t j=0UL; j<jbegin; ++j ) {
4509  reset( C(i,j) );
4510  }
4511  }
4512  for( size_t j=jbegin; j<jend; ++j ) {
4513  C(i,j) = A(i,i) * B(i,j) * scalar;
4514  }
4515  if( IsLower<MT5>::value ) {
4516  for( size_t j=jend; j<N; ++j ) {
4517  reset( C(i,j) );
4518  }
4519  }
4520  }
4521  }
4522  //**********************************************************************************************
4523 
4524  //**Default assignment to dense matrices (diagonal/diagonal)************************************
4538  template< typename MT3 // Type of the left-hand side target matrix
4539  , typename MT4 // Type of the left-hand side matrix operand
4540  , typename MT5 // Type of the right-hand side matrix operand
4541  , typename ST2 > // Type of the scalar value
4542  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
4543  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4544  {
4546 
4547  reset( C );
4548 
4549  for( size_t i=0UL; i<A.rows(); ++i ) {
4550  C(i,i) = A(i,i) * B(i,i) * scalar;
4551  }
4552  }
4553  //**********************************************************************************************
4554 
4555  //**Default assignment to dense matrices (small matrices)***************************************
4569  template< typename MT3 // Type of the left-hand side target matrix
4570  , typename MT4 // Type of the left-hand side matrix operand
4571  , typename MT5 // Type of the right-hand side matrix operand
4572  , typename ST2 > // Type of the scalar value
4574  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4575  {
4576  selectDefaultAssignKernel( C, A, B, scalar );
4577  }
4578  //**********************************************************************************************
4579 
4580  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
4595  template< typename MT3 // Type of the left-hand side target matrix
4596  , typename MT4 // Type of the left-hand side matrix operand
4597  , typename MT5 // Type of the right-hand side matrix operand
4598  , typename ST2 > // Type of the scalar value
4600  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4601  {
4602  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT5>::value );
4603 
4604  const size_t M( A.rows() );
4605  const size_t N( B.columns() );
4606  const size_t K( A.columns() );
4607 
4608  BLAZE_INTERNAL_ASSERT( !( SYM || HERM || LOW || UPP ) || ( M == N ), "Broken invariant detected" );
4609 
4610  const size_t jpos( remainder ? ( N & size_t(-SIMDSIZE) ) : N );
4611  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % SIMDSIZE ) ) == jpos, "Invalid end calculation" );
4612 
4613  const SIMDType factor( set( scalar ) );
4614 
4615  if( LOW && UPP && N > SIMDSIZE*3UL ) {
4616  reset( ~C );
4617  }
4618 
4619  {
4620  size_t j( 0UL );
4621 
4623  {
4624  for( ; !SYM && !HERM && !LOW && !UPP && (j+SIMDSIZE*7UL) < jpos; j+=SIMDSIZE*8UL ) {
4625  for( size_t i=0UL; i<M; ++i )
4626  {
4627  const size_t kbegin( ( IsUpper<MT4>::value )
4628  ?( ( IsLower<MT5>::value )
4629  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4630  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4631  :( IsLower<MT5>::value ? j : 0UL ) );
4632  const size_t kend( ( IsLower<MT4>::value )
4633  ?( ( IsUpper<MT5>::value )
4634  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+SIMDSIZE*8UL, K ) )
4635  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4636  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*8UL, K ) : K ) );
4637 
4638  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4639 
4640  for( size_t k=kbegin; k<kend; ++k ) {
4641  const SIMDType a1( set( A(i,k) ) );
4642  xmm1 += a1 * B.load(k,j );
4643  xmm2 += a1 * B.load(k,j+SIMDSIZE );
4644  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
4645  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
4646  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
4647  xmm6 += a1 * B.load(k,j+SIMDSIZE*5UL);
4648  xmm7 += a1 * B.load(k,j+SIMDSIZE*6UL);
4649  xmm8 += a1 * B.load(k,j+SIMDSIZE*7UL);
4650  }
4651 
4652  (~C).store( i, j , xmm1 * factor );
4653  (~C).store( i, j+SIMDSIZE , xmm2 * factor );
4654  (~C).store( i, j+SIMDSIZE*2UL, xmm3 * factor );
4655  (~C).store( i, j+SIMDSIZE*3UL, xmm4 * factor );
4656  (~C).store( i, j+SIMDSIZE*4UL, xmm5 * factor );
4657  (~C).store( i, j+SIMDSIZE*5UL, xmm6 * factor );
4658  (~C).store( i, j+SIMDSIZE*6UL, xmm7 * factor );
4659  (~C).store( i, j+SIMDSIZE*7UL, xmm8 * factor );
4660  }
4661  }
4662  }
4663 
4664  for( ; !SYM && !HERM && !LOW && !UPP && (j+SIMDSIZE*4UL) < jpos; j+=SIMDSIZE*5UL )
4665  {
4666  size_t i( 0UL );
4667 
4668  for( ; (i+2UL) <= M; i+=2UL )
4669  {
4670  const size_t kbegin( ( IsUpper<MT4>::value )
4671  ?( ( IsLower<MT5>::value )
4672  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4673  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4674  :( IsLower<MT5>::value ? j : 0UL ) );
4675  const size_t kend( ( IsLower<MT4>::value )
4676  ?( ( IsUpper<MT5>::value )
4677  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*5UL, K ) )
4678  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
4679  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*5UL, K ) : K ) );
4680 
4681  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;
4682 
4683  for( size_t k=kbegin; k<kend; ++k ) {
4684  const SIMDType a1( set( A(i ,k) ) );
4685  const SIMDType a2( set( A(i+1UL,k) ) );
4686  const SIMDType b1( B.load(k,j ) );
4687  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
4688  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
4689  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
4690  const SIMDType b5( B.load(k,j+SIMDSIZE*4UL) );
4691  xmm1 += a1 * b1;
4692  xmm2 += a1 * b2;
4693  xmm3 += a1 * b3;
4694  xmm4 += a1 * b4;
4695  xmm5 += a1 * b5;
4696  xmm6 += a2 * b1;
4697  xmm7 += a2 * b2;
4698  xmm8 += a2 * b3;
4699  xmm9 += a2 * b4;
4700  xmm10 += a2 * b5;
4701  }
4702 
4703  (~C).store( i , j , xmm1 * factor );
4704  (~C).store( i , j+SIMDSIZE , xmm2 * factor );
4705  (~C).store( i , j+SIMDSIZE*2UL, xmm3 * factor );
4706  (~C).store( i , j+SIMDSIZE*3UL, xmm4 * factor );
4707  (~C).store( i , j+SIMDSIZE*4UL, xmm5 * factor );
4708  (~C).store( i+1UL, j , xmm6 * factor );
4709  (~C).store( i+1UL, j+SIMDSIZE , xmm7 * factor );
4710  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm8 * factor );
4711  (~C).store( i+1UL, j+SIMDSIZE*3UL, xmm9 * factor );
4712  (~C).store( i+1UL, j+SIMDSIZE*4UL, xmm10 * factor );
4713  }
4714 
4715  if( i < M )
4716  {
4717  const size_t kbegin( ( IsUpper<MT4>::value )
4718  ?( ( IsLower<MT5>::value )
4719  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4720  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4721  :( IsLower<MT5>::value ? j : 0UL ) );
4722  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*5UL, K ) ):( K ) );
4723 
4724  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5;
4725 
4726  for( size_t k=kbegin; k<kend; ++k ) {
4727  const SIMDType a1( set( A(i,k) ) );
4728  xmm1 += a1 * B.load(k,j );
4729  xmm2 += a1 * B.load(k,j+SIMDSIZE );
4730  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
4731  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
4732  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
4733  }
4734 
4735  (~C).store( i, j , xmm1 * factor );
4736  (~C).store( i, j+SIMDSIZE , xmm2 * factor );
4737  (~C).store( i, j+SIMDSIZE*2UL, xmm3 * factor );
4738  (~C).store( i, j+SIMDSIZE*3UL, xmm4 * factor );
4739  (~C).store( i, j+SIMDSIZE*4UL, xmm5 * factor );
4740  }
4741  }
4742 
4743  for( ; !( LOW && UPP ) && (j+SIMDSIZE*3UL) < jpos; j+=SIMDSIZE*4UL )
4744  {
4745  const size_t iend( SYM || HERM || UPP ? min(j+SIMDSIZE*4UL,M) : M );
4746  size_t i( LOW ? j : 0UL );
4747 
4748  for( ; (i+2UL) <= iend; i+=2UL )
4749  {
4750  const size_t kbegin( ( IsUpper<MT4>::value )
4751  ?( ( IsLower<MT5>::value )
4752  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4753  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4754  :( IsLower<MT5>::value ? j : 0UL ) );
4755  const size_t kend( ( IsLower<MT4>::value )
4756  ?( ( IsUpper<MT5>::value )
4757  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*4UL, K ) )
4758  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
4759  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*4UL, K ) : K ) );
4760 
4761  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4762 
4763  for( size_t k=kbegin; k<kend; ++k ) {
4764  const SIMDType a1( set( A(i ,k) ) );
4765  const SIMDType a2( set( A(i+1UL,k) ) );
4766  const SIMDType b1( B.load(k,j ) );
4767  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
4768  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
4769  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
4770  xmm1 += a1 * b1;
4771  xmm2 += a1 * b2;
4772  xmm3 += a1 * b3;
4773  xmm4 += a1 * b4;
4774  xmm5 += a2 * b1;
4775  xmm6 += a2 * b2;
4776  xmm7 += a2 * b3;
4777  xmm8 += a2 * b4;
4778  }
4779 
4780  (~C).store( i , j , xmm1 * factor );
4781  (~C).store( i , j+SIMDSIZE , xmm2 * factor );
4782  (~C).store( i , j+SIMDSIZE*2UL, xmm3 * factor );
4783  (~C).store( i , j+SIMDSIZE*3UL, xmm4 * factor );
4784  (~C).store( i+1UL, j , xmm5 * factor );
4785  (~C).store( i+1UL, j+SIMDSIZE , xmm6 * factor );
4786  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm7 * factor );
4787  (~C).store( i+1UL, j+SIMDSIZE*3UL, xmm8 * factor );
4788  }
4789 
4790  if( i < iend )
4791  {
4792  const size_t kbegin( ( IsUpper<MT4>::value )
4793  ?( ( IsLower<MT5>::value )
4794  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4795  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4796  :( IsLower<MT5>::value ? j : 0UL ) );
4797  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*4UL, K ) ):( K ) );
4798 
4799  SIMDType xmm1, xmm2, xmm3, xmm4;
4800 
4801  for( size_t k=kbegin; k<kend; ++k ) {
4802  const SIMDType a1( set( A(i,k) ) );
4803  xmm1 += a1 * B.load(k,j );
4804  xmm2 += a1 * B.load(k,j+SIMDSIZE );
4805  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
4806  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
4807  }
4808 
4809  (~C).store( i, j , xmm1 * factor );
4810  (~C).store( i, j+SIMDSIZE , xmm2 * factor );
4811  (~C).store( i, j+SIMDSIZE*2UL, xmm3 * factor );
4812  (~C).store( i, j+SIMDSIZE*3UL, xmm4 * factor );
4813  }
4814  }
4815 
4816  for( ; (j+SIMDSIZE*2UL) < jpos; j+=SIMDSIZE*3UL )
4817  {
4818  const size_t iend( SYM || HERM || UPP ? min(j+SIMDSIZE*3UL,M) : M );
4819  size_t i( LOW ? j : 0UL );
4820 
4821  for( ; (i+2UL) <= iend; i+=2UL )
4822  {
4823  const size_t kbegin( ( IsUpper<MT4>::value )
4824  ?( ( IsLower<MT5>::value )
4825  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4826  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4827  :( IsLower<MT5>::value ? j : 0UL ) );
4828  const size_t kend( ( IsLower<MT4>::value )
4829  ?( ( IsUpper<MT5>::value )
4830  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*3UL, K ) )
4831  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
4832  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*3UL, K ) : K ) );
4833 
4834  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
4835 
4836  for( size_t k=kbegin; k<kend; ++k ) {
4837  const SIMDType a1( set( A(i ,k) ) );
4838  const SIMDType a2( set( A(i+1UL,k) ) );
4839  const SIMDType b1( B.load(k,j ) );
4840  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
4841  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
4842  xmm1 += a1 * b1;
4843  xmm2 += a1 * b2;
4844  xmm3 += a1 * b3;
4845  xmm4 += a2 * b1;
4846  xmm5 += a2 * b2;
4847  xmm6 += a2 * b3;
4848  }
4849 
4850  (~C).store( i , j , xmm1 * factor );
4851  (~C).store( i , j+SIMDSIZE , xmm2 * factor );
4852  (~C).store( i , j+SIMDSIZE*2UL, xmm3 * factor );
4853  (~C).store( i+1UL, j , xmm4 * factor );
4854  (~C).store( i+1UL, j+SIMDSIZE , xmm5 * factor );
4855  (~C).store( i+1UL, j+SIMDSIZE*2UL, xmm6 * factor );
4856  }
4857 
4858  if( i < iend )
4859  {
4860  const size_t kbegin( ( IsUpper<MT4>::value )
4861  ?( ( IsLower<MT5>::value )
4862  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4863  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4864  :( IsLower<MT5>::value ? j : 0UL ) );
4865  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*3UL, K ) ):( K ) );
4866 
4867  SIMDType xmm1, xmm2, xmm3;
4868 
4869  for( size_t k=kbegin; k<kend; ++k ) {
4870  const SIMDType a1( set( A(i,k) ) );
4871  xmm1 += a1 * B.load(k,j );
4872  xmm2 += a1 * B.load(k,j+SIMDSIZE );
4873  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
4874  }
4875 
4876  (~C).store( i, j , xmm1 * factor );
4877  (~C).store( i, j+SIMDSIZE , xmm2 * factor );
4878  (~C).store( i, j+SIMDSIZE*2UL, xmm3 * factor );
4879  }
4880  }
4881 
4882  for( ; (j+SIMDSIZE) < jpos; j+=SIMDSIZE*2UL )
4883  {
4884  const size_t iend( SYM || HERM || UPP ? min(j+SIMDSIZE*2UL,M) : M );
4885  size_t i( LOW ? j : 0UL );
4886 
4887  for( ; (i+2UL) <= iend; i+=2UL )
4888  {
4889  const size_t kbegin( ( IsUpper<MT4>::value )
4890  ?( ( IsLower<MT5>::value )
4891  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4892  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4893  :( IsLower<MT5>::value ? j : 0UL ) );
4894  const size_t kend( ( IsLower<MT4>::value )
4895  ?( ( IsUpper<MT5>::value )
4896  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*2UL, K ) )
4897  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
4898  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*2UL, K ) : K ) );
4899 
4900  SIMDType xmm1, xmm2, xmm3, xmm4;
4901 
4902  for( size_t k=kbegin; k<kend; ++k ) {
4903  const SIMDType a1( set( A(i ,k) ) );
4904  const SIMDType a2( set( A(i+1UL,k) ) );
4905  const SIMDType b1( B.load(k,j ) );
4906  const SIMDType b2( B.load(k,j+SIMDSIZE) );
4907  xmm1 += a1 * b1;
4908  xmm2 += a1 * b2;
4909  xmm3 += a2 * b1;
4910  xmm4 += a2 * b2;
4911  }
4912 
4913  (~C).store( i , j , xmm1 * factor );
4914  (~C).store( i , j+SIMDSIZE, xmm2 * factor );
4915  (~C).store( i+1UL, j , xmm3 * factor );
4916  (~C).store( i+1UL, j+SIMDSIZE, xmm4 * factor );
4917  }
4918 
4919  if( i < iend )
4920  {
4921  const size_t kbegin( ( IsUpper<MT4>::value )
4922  ?( ( IsLower<MT5>::value )
4923  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4924  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4925  :( IsLower<MT5>::value ? j : 0UL ) );
4926  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*2UL, K ) ):( K ) );
4927 
4928  SIMDType xmm1, xmm2;
4929 
4930  for( size_t k=kbegin; k<kend; ++k ) {
4931  const SIMDType a1( set( A(i,k) ) );
4932  xmm1 += a1 * B.load(k,j );
4933  xmm2 += a1 * B.load(k,j+SIMDSIZE);
4934  }
4935 
4936  (~C).store( i, j , xmm1 * factor );
4937  (~C).store( i, j+SIMDSIZE, xmm2 * factor );
4938  }
4939  }
4940 
4941  for( ; j<jpos; j+=SIMDSIZE )
4942  {
4943  const size_t iend( SYM || HERM || UPP ? min(j+SIMDSIZE,M) : M );
4944  size_t i( LOW ? j : 0UL );
4945 
4946  for( ; (i+2UL) <= iend; i+=2UL )
4947  {
4948  const size_t kbegin( ( IsUpper<MT4>::value )
4949  ?( ( IsLower<MT5>::value )
4950  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4951  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4952  :( IsLower<MT5>::value ? j : 0UL ) );
4953  const size_t kend( ( IsLower<MT4>::value )
4954  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
4955  :( K ) );
4956 
4957  SIMDType xmm1, xmm2;
4958 
4959  for( size_t k=kbegin; k<kend; ++k ) {
4960  const SIMDType b1( B.load(k,j) );
4961  xmm1 += set( A(i ,k) ) * b1;
4962  xmm2 += set( A(i+1UL,k) ) * b1;
4963  }
4964 
4965  (~C).store( i , j, xmm1 * factor );
4966  (~C).store( i+1UL, j, xmm2 * factor );
4967  }
4968 
4969  if( i < iend )
4970  {
4971  const size_t kbegin( ( IsUpper<MT4>::value )
4972  ?( ( IsLower<MT5>::value )
4973  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4974  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4975  :( IsLower<MT5>::value ? j : 0UL ) );
4976 
4977  SIMDType xmm1;
4978 
4979  for( size_t k=kbegin; k<K; ++k ) {
4980  xmm1 += set( A(i,k) ) * B.load(k,j);
4981  }
4982 
4983  (~C).store( i, j, xmm1 * factor );
4984  }
4985  }
4986 
4987  for( ; remainder && j<N; ++j )
4988  {
4989  size_t i( LOW && UPP ? j : 0UL );
4990 
4991  for( ; (i+2UL) <= M; i+=2UL )
4992  {
4993  const size_t kbegin( ( IsUpper<MT4>::value )
4994  ?( ( IsLower<MT5>::value )
4995  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
4996  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4997  :( IsLower<MT5>::value ? j : 0UL ) );
4998  const size_t kend( ( IsLower<MT4>::value )
4999  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
5000  :( K ) );
5001 
5002  ElementType value1 = ElementType();
5003  ElementType value2 = ElementType();
5004 
5005  for( size_t k=kbegin; k<kend; ++k ) {
5006  value1 += A(i ,k) * B(k,j);
5007  value2 += A(i+1UL,k) * B(k,j);
5008  }
5009 
5010  (~C)(i ,j) = value1 * scalar;
5011  (~C)(i+1UL,j) = value2 * scalar;
5012  }
5013 
5014  if( i < M )
5015  {
5016  const size_t kbegin( ( IsUpper<MT4>::value )
5017  ?( ( IsLower<MT5>::value )
5018  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5019  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5020  :( IsLower<MT5>::value ? j : 0UL ) );
5021 
5022  ElementType value = ElementType();
5023 
5024  for( size_t k=kbegin; k<K; ++k ) {
5025  value += A(i,k) * B(k,j);
5026  }
5027 
5028  (~C)(i,j) = value * scalar;
5029  }
5030  }
5031  }
5032 
5033  if( ( SYM || HERM ) && ( N > SIMDSIZE*4UL ) ) {
5034  for( size_t i=SIMDSIZE*4UL; i<M; ++i ) {
5035  const size_t jend( ( SIMDSIZE*4UL ) * ( i / (SIMDSIZE*4UL) ) );
5036  for( size_t j=0UL; j<jend; ++j ) {
5037  (~C)(i,j) = HERM ? conj( (~C)(j,i) ) : (~C)(j,i);
5038  }
5039  }
5040  }
5041  else if( LOW && !UPP && N > SIMDSIZE*4UL ) {
5042  for( size_t j=SIMDSIZE*4UL; j<N; ++j ) {
5043  const size_t iend( ( SIMDSIZE*4UL ) * ( j / (SIMDSIZE*4UL) ) );
5044  for( size_t i=0UL; i<iend; ++i ) {
5045  reset( (~C)(i,j) );
5046  }
5047  }
5048  }
5049  else if( !LOW && UPP && N > SIMDSIZE*4UL ) {
5050  for( size_t i=SIMDSIZE*4UL; i<M; ++i ) {
5051  const size_t jend( ( SIMDSIZE*4UL ) * ( i / (SIMDSIZE*4UL) ) );
5052  for( size_t j=0UL; j<jend; ++j ) {
5053  reset( (~C)(i,j) );
5054  }
5055  }
5056  }
5057  }
5058  //**********************************************************************************************
5059 
5060  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
5075  template< typename MT3 // Type of the left-hand side target matrix
5076  , typename MT4 // Type of the left-hand side matrix operand
5077  , typename MT5 // Type of the right-hand side matrix operand
5078  , typename ST2 > // Type of the scalar value
5080  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5081  {
5086 
5087  const ForwardFunctor fwd;
5088 
5090  const OppositeType_<MT4> tmp( serial( A ) );
5091  assign( ~C, fwd( tmp * B ) * scalar );
5092  }
5094  const OppositeType_<MT5> tmp( serial( B ) );
5095  assign( ~C, fwd( A * tmp ) * scalar );
5096  }
5097  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
5098  const OppositeType_<MT4> tmp( serial( A ) );
5099  assign( ~C, fwd( tmp * B ) * scalar );
5100  }
5101  else {
5102  const OppositeType_<MT5> tmp( serial( B ) );
5103  assign( ~C, fwd( A * tmp ) * scalar );
5104  }
5105  }
5106  //**********************************************************************************************
5107 
5108  //**Default assignment to dense matrices (large matrices)***************************************
5122  template< typename MT3 // Type of the left-hand side target matrix
5123  , typename MT4 // Type of the left-hand side matrix operand
5124  , typename MT5 // Type of the right-hand side matrix operand
5125  , typename ST2 > // Type of the scalar value
5127  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5128  {
5129  selectDefaultAssignKernel( C, A, B, scalar );
5130  }
5131  //**********************************************************************************************
5132 
5133  //**Vectorized default assignment to dense matrices (large matrices)****************************
5148  template< typename MT3 // Type of the left-hand side target matrix
5149  , typename MT4 // Type of the left-hand side matrix operand
5150  , typename MT5 // Type of the right-hand side matrix operand
5151  , typename ST2 > // Type of the scalar value
5153  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5154  {
5155  if( SYM )
5156  smmm( C, A, B, scalar );
5157  else if( HERM )
5158  hmmm( C, A, B, scalar );
5159  else if( LOW )
5160  lmmm( C, A, B, scalar, ST2(0) );
5161  else if( UPP )
5162  ummm( C, A, B, scalar, ST2(0) );
5163  else
5164  mmm( C, A, B, scalar, ST2(0) );
5165  }
5166  //**********************************************************************************************
5167 
5168  //**BLAS-based assignment to dense matrices (default)*******************************************
5182  template< typename MT3 // Type of the left-hand side target matrix
5183  , typename MT4 // Type of the left-hand side matrix operand
5184  , typename MT5 // Type of the right-hand side matrix operand
5185  , typename ST2 > // Type of the scalar value
5187  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5188  {
5189  selectLargeAssignKernel( C, A, B, scalar );
5190  }
5191  //**********************************************************************************************
5192 
5193  //**BLAS-based assignment to dense matrices*****************************************************
5194 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
5195 
5208  template< typename MT3 // Type of the left-hand side target matrix
5209  , typename MT4 // Type of the left-hand side matrix operand
5210  , typename MT5 // Type of the right-hand side matrix operand
5211  , typename ST2 > // Type of the scalar value
5213  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5214  {
5215  typedef ElementType_<MT3> ET;
5216 
5217  if( IsTriangular<MT4>::value ) {
5218  assign( C, B );
5219  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5220  }
5221  else if( IsTriangular<MT5>::value ) {
5222  assign( C, A );
5223  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5224  }
5225  else {
5226  gemm( C, A, B, ET(scalar), ET(0) );
5227  }
5228  }
5229 #endif
5230  //**********************************************************************************************
5231 
5232  //**Assignment to sparse matrices***************************************************************
5244  template< typename MT // Type of the target sparse matrix
5245  , bool SO > // Storage order of the target sparse matrix
5247  assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5248  {
5250 
5252 
5259 
5260  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5261  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5262 
5263  const ForwardFunctor fwd;
5264 
5265  const TmpType tmp( serial( rhs ) );
5266  assign( ~lhs, fwd( tmp ) );
5267  }
5268  //**********************************************************************************************
5269 
5270  //**Restructuring assignment to column-major matrices*******************************************
5284  template< typename MT > // Type of the target matrix
5286  assign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
5287  {
5289 
5291 
5292  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5293  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5294 
5295  const ForwardFunctor fwd;
5296 
5297  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
5298  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
5299 
5301  assign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
5302  else if( IsSymmetric<MT1>::value )
5303  assign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
5304  else
5305  assign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
5306  }
5307  //**********************************************************************************************
5308 
5309  //**Addition assignment to dense matrices*******************************************************
5321  template< typename MT // Type of the target dense matrix
5322  , bool SO > // Storage order of the target dense matrix
5324  addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5325  {
5327 
5328  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5329  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5330 
5331  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
5332  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
5333 
5334  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
5335  return;
5336  }
5337 
5338  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
5339  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
5340 
5341  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5342  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
5343  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
5344  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
5345  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
5346  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
5347 
5348  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
5349  }
5350  //**********************************************************************************************
5351 
5352  //**Addition assignment to dense matrices (kernel selection)************************************
5363  template< typename MT3 // Type of the left-hand side target matrix
5364  , typename MT4 // Type of the left-hand side matrix operand
5365  , typename MT5 // Type of the right-hand side matrix operand
5366  , typename ST2 > // Type of the scalar value
5367  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5368  {
5369  if( ( IsDiagonal<MT5>::value ) ||
5370  ( !BLAZE_DEBUG_MODE && B.columns() <= SIMDSIZE*10UL ) ||
5371  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
5372  selectSmallAddAssignKernel( C, A, B, scalar );
5373  else
5374  selectBlasAddAssignKernel( C, A, B, scalar );
5375  }
5376  //**********************************************************************************************
5377 
5378  //**Default addition assignment to dense matrices (general/general)*****************************
5392  template< typename MT3 // Type of the left-hand side target matrix
5393  , typename MT4 // Type of the left-hand side matrix operand
5394  , typename MT5 // Type of the right-hand side matrix operand
5395  , typename ST2 > // Type of the scalar value
5396  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
5397  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5398  {
5399  const ResultType tmp( serial( A * B * scalar ) );
5400  addAssign( C, tmp );
5401  }
5402  //**********************************************************************************************
5403 
5404  //**Default addition assignment to dense matrices (general/diagonal)****************************
5418  template< typename MT3 // Type of the left-hand side target matrix
5419  , typename MT4 // Type of the left-hand side matrix operand
5420  , typename MT5 // Type of the right-hand side matrix operand
5421  , typename ST2 > // Type of the scalar value
5422  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
5423  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5424  {
5426 
5427  const size_t M( A.rows() );
5428  const size_t N( B.columns() );
5429 
5430  for( size_t i=0UL; i<M; ++i )
5431  {
5432  const size_t jbegin( ( IsUpper<MT4>::value )
5433  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
5434  :( 0UL ) );
5435  const size_t jend( ( IsLower<MT4>::value )
5436  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
5437  :( N ) );
5438  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
5439 
5440  const size_t jnum( jend - jbegin );
5441  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
5442 
5443  for( size_t j=jbegin; j<jpos; j+=2UL ) {
5444  C(i,j ) += A(i,j ) * B(j ,j ) * scalar;
5445  C(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
5446  }
5447  if( jpos < jend ) {
5448  C(i,jpos) += A(i,jpos) * B(jpos,jpos) * scalar;
5449  }
5450  }
5451  }
5452  //**********************************************************************************************
5453 
5454  //**Default addition assignment to dense matrices (diagonal/general)****************************
5468  template< typename MT3 // Type of the left-hand side target matrix
5469  , typename MT4 // Type of the left-hand side matrix operand
5470  , typename MT5 // Type of the right-hand side matrix operand
5471  , typename ST2 > // Type of the scalar value
5472  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
5473  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5474  {
5476 
5477  const size_t M( A.rows() );
5478  const size_t N( B.columns() );
5479 
5480  for( size_t i=0UL; i<M; ++i )
5481  {
5482  const size_t jbegin( ( IsUpper<MT5>::value )
5483  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
5484  :( 0UL ) );
5485  const size_t jend( ( IsLower<MT5>::value )
5486  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
5487  :( N ) );
5488  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
5489 
5490  const size_t jnum( jend - jbegin );
5491  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
5492 
5493  for( size_t j=jbegin; j<jpos; j+=2UL ) {
5494  C(i,j ) += A(i,i) * B(i,j ) * scalar;
5495  C(i,j+1UL) += A(i,i) * B(i,j+1UL) * scalar;
5496  }
5497  if( jpos < jend ) {
5498  C(i,jpos) += A(i,i) * B(i,jpos) * scalar;
5499  }
5500  }
5501  }
5502  //**********************************************************************************************
5503 
5504  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
5518  template< typename MT3 // Type of the left-hand side target matrix
5519  , typename MT4 // Type of the left-hand side matrix operand
5520  , typename MT5 // Type of the right-hand side matrix operand
5521  , typename ST2 > // Type of the scalar value
5522  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
5523  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5524  {
5526 
5527  for( size_t i=0UL; i<A.rows(); ++i ) {
5528  C(i,i) += A(i,i) * B(i,i) * scalar;
5529  }
5530  }
5531  //**********************************************************************************************
5532 
5533  //**Default addition assignment to dense matrices (small matrices)******************************
5547  template< typename MT3 // Type of the left-hand side target matrix
5548  , typename MT4 // Type of the left-hand side matrix operand
5549  , typename MT5 // Type of the right-hand side matrix operand
5550  , typename ST2 > // Type of the scalar value
5552  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5553  {
5554  selectDefaultAddAssignKernel( C, A, B, scalar );
5555  }
5556  //**********************************************************************************************
5557 
5558  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
5573  template< typename MT3 // Type of the left-hand side target matrix
5574  , typename MT4 // Type of the left-hand side matrix operand
5575  , typename MT5 // Type of the right-hand side matrix operand
5576  , typename ST2 > // Type of the scalar value
5578  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5579  {
5580  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT5>::value );
5581 
5582  const size_t M( A.rows() );
5583  const size_t N( B.columns() );
5584  const size_t K( A.columns() );
5585 
5586  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
5587 
5588  const size_t jpos( remainder ? ( N & size_t(-SIMDSIZE) ) : N );
5589  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % SIMDSIZE ) ) == jpos, "Invalid end calculation" );
5590 
5591  const SIMDType factor( set( scalar ) );
5592 
5593  size_t j( 0UL );
5594 
5596  {
5597  for( ; !LOW && !UPP && (j+SIMDSIZE*7UL) < jpos; j+=SIMDSIZE*8UL ) {
5598  for( size_t i=0UL; i<M; ++i )
5599  {
5600  const size_t kbegin( ( IsUpper<MT4>::value )
5601  ?( ( IsLower<MT5>::value )
5602  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5603  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5604  :( IsLower<MT5>::value ? j : 0UL ) );
5605  const size_t kend( ( IsLower<MT4>::value )
5606  ?( ( IsUpper<MT5>::value )
5607  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+SIMDSIZE*8UL, K ) )
5608  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
5609  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*8UL, K ) : K ) );
5610 
5611  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5612 
5613  for( size_t k=kbegin; k<kend; ++k ) {
5614  const SIMDType a1( set( A(i,k) ) );
5615  xmm1 += a1 * B.load(k,j );
5616  xmm2 += a1 * B.load(k,j+SIMDSIZE );
5617  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
5618  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
5619  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
5620  xmm6 += a1 * B.load(k,j+SIMDSIZE*5UL);
5621  xmm7 += a1 * B.load(k,j+SIMDSIZE*6UL);
5622  xmm8 += a1 * B.load(k,j+SIMDSIZE*7UL);
5623  }
5624 
5625  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
5626  (~C).store( i, j+SIMDSIZE , (~C).load(i,j+SIMDSIZE ) + xmm2 * factor );
5627  (~C).store( i, j+SIMDSIZE*2UL, (~C).load(i,j+SIMDSIZE*2UL) + xmm3 * factor );
5628  (~C).store( i, j+SIMDSIZE*3UL, (~C).load(i,j+SIMDSIZE*3UL) + xmm4 * factor );
5629  (~C).store( i, j+SIMDSIZE*4UL, (~C).load(i,j+SIMDSIZE*4UL) + xmm5 * factor );
5630  (~C).store( i, j+SIMDSIZE*5UL, (~C).load(i,j+SIMDSIZE*5UL) + xmm6 * factor );
5631  (~C).store( i, j+SIMDSIZE*6UL, (~C).load(i,j+SIMDSIZE*6UL) + xmm7 * factor );
5632  (~C).store( i, j+SIMDSIZE*7UL, (~C).load(i,j+SIMDSIZE*7UL) + xmm8 * factor );
5633  }
5634  }
5635  }
5636 
5637  for( ; !LOW && !UPP && (j+SIMDSIZE*4UL) < jpos; j+=SIMDSIZE*5UL )
5638  {
5639  size_t i( 0UL );
5640 
5641  for( ; (i+2UL) <= M; i+=2UL )
5642  {
5643  const size_t kbegin( ( IsUpper<MT4>::value )
5644  ?( ( IsLower<MT5>::value )
5645  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5646  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5647  :( IsLower<MT5>::value ? j : 0UL ) );
5648  const size_t kend( ( IsLower<MT4>::value )
5649  ?( ( IsUpper<MT5>::value )
5650  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*5UL, K ) )
5651  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
5652  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*5UL, K ) : K ) );
5653 
5654  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;
5655 
5656  for( size_t k=kbegin; k<kend; ++k ) {
5657  const SIMDType a1( set( A(i ,k) ) );
5658  const SIMDType a2( set( A(i+1UL,k) ) );
5659  const SIMDType b1( B.load(k,j ) );
5660  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
5661  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
5662  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
5663  const SIMDType b5( B.load(k,j+SIMDSIZE*4UL) );
5664  xmm1 += a1 * b1;
5665  xmm2 += a1 * b2;
5666  xmm3 += a1 * b3;
5667  xmm4 += a1 * b4;
5668  xmm5 += a1 * b5;
5669  xmm6 += a2 * b1;
5670  xmm7 += a2 * b2;
5671  xmm8 += a2 * b3;
5672  xmm9 += a2 * b4;
5673  xmm10 += a2 * b5;
5674  }
5675 
5676  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
5677  (~C).store( i , j+SIMDSIZE , (~C).load(i ,j+SIMDSIZE ) + xmm2 * factor );
5678  (~C).store( i , j+SIMDSIZE*2UL, (~C).load(i ,j+SIMDSIZE*2UL) + xmm3 * factor );
5679  (~C).store( i , j+SIMDSIZE*3UL, (~C).load(i ,j+SIMDSIZE*3UL) + xmm4 * factor );
5680  (~C).store( i , j+SIMDSIZE*4UL, (~C).load(i ,j+SIMDSIZE*4UL) + xmm5 * factor );
5681  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm6 * factor );
5682  (~C).store( i+1UL, j+SIMDSIZE , (~C).load(i+1UL,j+SIMDSIZE ) + xmm7 * factor );
5683  (~C).store( i+1UL, j+SIMDSIZE*2UL, (~C).load(i+1UL,j+SIMDSIZE*2UL) + xmm8 * factor );
5684  (~C).store( i+1UL, j+SIMDSIZE*3UL, (~C).load(i+1UL,j+SIMDSIZE*3UL) + xmm9 * factor );
5685  (~C).store( i+1UL, j+SIMDSIZE*4UL, (~C).load(i+1UL,j+SIMDSIZE*4UL) + xmm10 * factor );
5686  }
5687 
5688  if( i < M )
5689  {
5690  const size_t kbegin( ( IsUpper<MT4>::value )
5691  ?( ( IsLower<MT5>::value )
5692  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5693  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5694  :( IsLower<MT5>::value ? j : 0UL ) );
5695  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*5UL, K ) ):( K ) );
5696 
5697  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5;
5698 
5699  for( size_t k=kbegin; k<kend; ++k ) {
5700  const SIMDType a1( set( A(i,k) ) );
5701  xmm1 += a1 * B.load(k,j );
5702  xmm2 += a1 * B.load(k,j+SIMDSIZE );
5703  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
5704  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
5705  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
5706  }
5707 
5708  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
5709  (~C).store( i, j+SIMDSIZE , (~C).load(i,j+SIMDSIZE ) + xmm2 * factor );
5710  (~C).store( i, j+SIMDSIZE*2UL, (~C).load(i,j+SIMDSIZE*2UL) + xmm3 * factor );
5711  (~C).store( i, j+SIMDSIZE*3UL, (~C).load(i,j+SIMDSIZE*3UL) + xmm4 * factor );
5712  (~C).store( i, j+SIMDSIZE*4UL, (~C).load(i,j+SIMDSIZE*4UL) + xmm5 * factor );
5713  }
5714  }
5715 
5716  for( ; !LOW && !UPP && (j+SIMDSIZE*3UL) < jpos; j+=SIMDSIZE*4UL )
5717  {
5718  size_t i( 0UL );
5719 
5720  for( ; (i+2UL) <= M; i+=2UL )
5721  {
5722  const size_t kbegin( ( IsUpper<MT4>::value )
5723  ?( ( IsLower<MT5>::value )
5724  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5725  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5726  :( IsLower<MT5>::value ? j : 0UL ) );
5727  const size_t kend( ( IsLower<MT4>::value )
5728  ?( ( IsUpper<MT5>::value )
5729  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*4UL, K ) )
5730  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
5731  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*4UL, K ) : K ) );
5732 
5733  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5734 
5735  for( size_t k=kbegin; k<kend; ++k ) {
5736  const SIMDType a1( set( A(i ,k) ) );
5737  const SIMDType a2( set( A(i+1UL,k) ) );
5738  const SIMDType b1( B.load(k,j ) );
5739  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
5740  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
5741  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
5742  xmm1 += a1 * b1;
5743  xmm2 += a1 * b2;
5744  xmm3 += a1 * b3;
5745  xmm4 += a1 * b4;
5746  xmm5 += a2 * b1;
5747  xmm6 += a2 * b2;
5748  xmm7 += a2 * b3;
5749  xmm8 += a2 * b4;
5750  }
5751 
5752  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
5753  (~C).store( i , j+SIMDSIZE , (~C).load(i ,j+SIMDSIZE ) + xmm2 * factor );
5754  (~C).store( i , j+SIMDSIZE*2UL, (~C).load(i ,j+SIMDSIZE*2UL) + xmm3 * factor );
5755  (~C).store( i , j+SIMDSIZE*3UL, (~C).load(i ,j+SIMDSIZE*3UL) + xmm4 * factor );
5756  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
5757  (~C).store( i+1UL, j+SIMDSIZE , (~C).load(i+1UL,j+SIMDSIZE ) + xmm6 * factor );
5758  (~C).store( i+1UL, j+SIMDSIZE*2UL, (~C).load(i+1UL,j+SIMDSIZE*2UL) + xmm7 * factor );
5759  (~C).store( i+1UL, j+SIMDSIZE*3UL, (~C).load(i+1UL,j+SIMDSIZE*3UL) + xmm8 * factor );
5760  }
5761 
5762  if( i < M )
5763  {
5764  const size_t kbegin( ( IsUpper<MT4>::value )
5765  ?( ( IsLower<MT5>::value )
5766  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5767  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5768  :( IsLower<MT5>::value ? j : 0UL ) );
5769  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*4UL, K ) ):( K ) );
5770 
5771  SIMDType xmm1, xmm2, xmm3, xmm4;
5772 
5773  for( size_t k=kbegin; k<kend; ++k ) {
5774  const SIMDType a1( set( A(i,k) ) );
5775  xmm1 += a1 * B.load(k,j );
5776  xmm2 += a1 * B.load(k,j+SIMDSIZE );
5777  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
5778  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
5779  }
5780 
5781  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
5782  (~C).store( i, j+SIMDSIZE , (~C).load(i,j+SIMDSIZE ) + xmm2 * factor );
5783  (~C).store( i, j+SIMDSIZE*2UL, (~C).load(i,j+SIMDSIZE*2UL) + xmm3 * factor );
5784  (~C).store( i, j+SIMDSIZE*3UL, (~C).load(i,j+SIMDSIZE*3UL) + xmm4 * factor );
5785  }
5786  }
5787 
5788  for( ; !LOW && !UPP && (j+SIMDSIZE*2UL) < jpos; j+=SIMDSIZE*3UL )
5789  {
5790  size_t i( 0UL );
5791 
5792  for( ; (i+2UL) <= M; i+=2UL )
5793  {
5794  const size_t kbegin( ( IsUpper<MT4>::value )
5795  ?( ( IsLower<MT5>::value )
5796  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5797  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5798  :( IsLower<MT5>::value ? j : 0UL ) );
5799  const size_t kend( ( IsLower<MT4>::value )
5800  ?( ( IsUpper<MT5>::value )
5801  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*3UL, K ) )
5802  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
5803  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*3UL, K ) : K ) );
5804 
5805  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
5806 
5807  for( size_t k=kbegin; k<kend; ++k ) {
5808  const SIMDType a1( set( A(i ,k) ) );
5809  const SIMDType a2( set( A(i+1UL,k) ) );
5810  const SIMDType b1( B.load(k,j ) );
5811  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
5812  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
5813  xmm1 += a1 * b1;
5814  xmm2 += a1 * b2;
5815  xmm3 += a1 * b3;
5816  xmm4 += a2 * b1;
5817  xmm5 += a2 * b2;
5818  xmm6 += a2 * b3;
5819  }
5820 
5821  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
5822  (~C).store( i , j+SIMDSIZE , (~C).load(i ,j+SIMDSIZE ) + xmm2 * factor );
5823  (~C).store( i , j+SIMDSIZE*2UL, (~C).load(i ,j+SIMDSIZE*2UL) + xmm3 * factor );
5824  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm4 * factor );
5825  (~C).store( i+1UL, j+SIMDSIZE , (~C).load(i+1UL,j+SIMDSIZE ) + xmm5 * factor );
5826  (~C).store( i+1UL, j+SIMDSIZE*2UL, (~C).load(i+1UL,j+SIMDSIZE*2UL) + xmm6 * factor );
5827  }
5828 
5829  if( i < M )
5830  {
5831  const size_t kbegin( ( IsUpper<MT4>::value )
5832  ?( ( IsLower<MT5>::value )
5833  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5834  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5835  :( IsLower<MT5>::value ? j : 0UL ) );
5836  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*3UL, K ) ):( K ) );
5837 
5838  SIMDType xmm1, xmm2, xmm3;
5839 
5840  for( size_t k=kbegin; k<kend; ++k ) {
5841  const SIMDType a1( set( A(i,k) ) );
5842  xmm1 += a1 * B.load(k,j );
5843  xmm2 += a1 * B.load(k,j+SIMDSIZE );
5844  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
5845  }
5846 
5847  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
5848  (~C).store( i, j+SIMDSIZE , (~C).load(i,j+SIMDSIZE ) + xmm2 * factor );
5849  (~C).store( i, j+SIMDSIZE*2UL, (~C).load(i,j+SIMDSIZE*2UL) + xmm3 * factor );
5850  }
5851  }
5852 
5853  for( ; !( LOW && UPP ) && (j+SIMDSIZE) < jpos; j+=SIMDSIZE*2UL )
5854  {
5855  const size_t iend( UPP ? min(j+SIMDSIZE*2UL,M) : M );
5856  size_t i( LOW ? j : 0UL );
5857 
5858  for( ; (i+2UL) <= iend; i+=2UL )
5859  {
5860  const size_t kbegin( ( IsUpper<MT4>::value )
5861  ?( ( IsLower<MT5>::value )
5862  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5863  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5864  :( IsLower<MT5>::value ? j : 0UL ) );
5865  const size_t kend( ( IsLower<MT4>::value )
5866  ?( ( IsUpper<MT5>::value )
5867  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*2UL, K ) )
5868  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
5869  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*2UL, K ) : K ) );
5870 
5871  SIMDType xmm1, xmm2, xmm3, xmm4;
5872 
5873  for( size_t k=kbegin; k<kend; ++k ) {
5874  const SIMDType a1( set( A(i ,k) ) );
5875  const SIMDType a2( set( A(i+1UL,k) ) );
5876  const SIMDType b1( B.load(k,j ) );
5877  const SIMDType b2( B.load(k,j+SIMDSIZE) );
5878  xmm1 += a1 * b1;
5879  xmm2 += a1 * b2;
5880  xmm3 += a2 * b1;
5881  xmm4 += a2 * b2;
5882  }
5883 
5884  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
5885  (~C).store( i , j+SIMDSIZE, (~C).load(i ,j+SIMDSIZE) + xmm2 * factor );
5886  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
5887  (~C).store( i+1UL, j+SIMDSIZE, (~C).load(i+1UL,j+SIMDSIZE) + xmm4 * factor );
5888  }
5889 
5890  if( i < iend )
5891  {
5892  const size_t kbegin( ( IsUpper<MT4>::value )
5893  ?( ( IsLower<MT5>::value )
5894  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5895  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5896  :( IsLower<MT5>::value ? j : 0UL ) );
5897  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*2UL, K ) ):( K ) );
5898 
5899  SIMDType xmm1, xmm2;
5900 
5901  for( size_t k=kbegin; k<kend; ++k ) {
5902  const SIMDType a1( set( A(i,k) ) );
5903  xmm1 += a1 * B.load(k,j );
5904  xmm2 += a1 * B.load(k,j+SIMDSIZE);
5905  }
5906 
5907  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
5908  (~C).store( i, j+SIMDSIZE, (~C).load(i,j+SIMDSIZE) + xmm2 * factor );
5909  }
5910  }
5911 
5912  for( ; j<jpos; j+=SIMDSIZE )
5913  {
5914  const size_t iend( LOW && UPP ? min(j+SIMDSIZE,M) : M );
5915  size_t i( LOW ? j : 0UL );
5916 
5917  for( ; (i+2UL) <= iend; i+=2UL )
5918  {
5919  const size_t kbegin( ( IsUpper<MT4>::value )
5920  ?( ( IsLower<MT5>::value )
5921  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5922  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5923  :( IsLower<MT5>::value ? j : 0UL ) );
5924  const size_t kend( ( IsLower<MT4>::value )
5925  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
5926  :( K ) );
5927 
5928  SIMDType xmm1, xmm2;
5929 
5930  for( size_t k=kbegin; k<kend; ++k ) {
5931  const SIMDType b1( B.load(k,j) );
5932  xmm1 += set( A(i ,k) ) * b1;
5933  xmm2 += set( A(i+1UL,k) ) * b1;
5934  }
5935 
5936  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
5937  (~C).store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
5938  }
5939 
5940  if( i < iend )
5941  {
5942  const size_t kbegin( ( IsUpper<MT4>::value )
5943  ?( ( IsLower<MT5>::value )
5944  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5945  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5946  :( IsLower<MT5>::value ? j : 0UL ) );
5947 
5948  SIMDType xmm1;
5949 
5950  for( size_t k=kbegin; k<K; ++k ) {
5951  xmm1 += set( A(i,k) ) * B.load(k,j);
5952  }
5953 
5954  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
5955  }
5956  }
5957 
5958  for( ; remainder && j<N; ++j )
5959  {
5960  const size_t iend( UPP ? j+1UL : M );
5961  size_t i( LOW ? j : 0UL );
5962 
5963  for( ; (i+2UL) <= iend; i+=2UL )
5964  {
5965  const size_t kbegin( ( IsUpper<MT4>::value )
5966  ?( ( IsLower<MT5>::value )
5967  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5968  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5969  :( IsLower<MT5>::value ? j : 0UL ) );
5970  const size_t kend( ( IsLower<MT4>::value )
5971  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
5972  :( K ) );
5973 
5974  ElementType value1 = ElementType();
5975  ElementType value2 = ElementType();
5976 
5977  for( size_t k=kbegin; k<kend; ++k ) {
5978  value1 += A(i ,k) * B(k,j);
5979  value2 += A(i+1UL,k) * B(k,j);
5980  }
5981 
5982  (~C)(i ,j) += value1 * scalar;
5983  (~C)(i+1UL,j) += value2 * scalar;
5984  }
5985 
5986  if( i < iend )
5987  {
5988  const size_t kbegin( ( IsUpper<MT4>::value )
5989  ?( ( IsLower<MT5>::value )
5990  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
5991  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
5992  :( IsLower<MT5>::value ? j : 0UL ) );
5993 
5994  ElementType value = ElementType();
5995 
5996  for( size_t k=kbegin; k<K; ++k ) {
5997  value += A(i,k) * B(k,j);
5998  }
5999 
6000  (~C)(i,j) += value * scalar;
6001  }
6002  }
6003  }
6004  //**********************************************************************************************
6005 
6006  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
6021  template< typename MT3 // Type of the left-hand side target matrix
6022  , typename MT4 // Type of the left-hand side matrix operand
6023  , typename MT5 // Type of the right-hand side matrix operand
6024  , typename ST2 > // Type of the scalar value
6026  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6027  {
6032 
6033  const ForwardFunctor fwd;
6034 
6036  const OppositeType_<MT4> tmp( serial( A ) );
6037  addAssign( ~C, fwd( tmp * B ) * scalar );
6038  }
6040  const OppositeType_<MT5> tmp( serial( B ) );
6041  addAssign( ~C, fwd( A * tmp ) * scalar );
6042  }
6043  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
6044  const OppositeType_<MT4> tmp( serial( A ) );
6045  addAssign( ~C, fwd( tmp * B ) * scalar );
6046  }
6047  else {
6048  const OppositeType_<MT5> tmp( serial( B ) );
6049  addAssign( ~C, fwd( A * tmp ) * scalar );
6050  }
6051  }
6052  //**********************************************************************************************
6053 
6054  //**Default addition assignment to dense matrices (large matrices)******************************
6068  template< typename MT3 // Type of the left-hand side target matrix
6069  , typename MT4 // Type of the left-hand side matrix operand
6070  , typename MT5 // Type of the right-hand side matrix operand
6071  , typename ST2 > // Type of the scalar value
6073  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6074  {
6075  selectDefaultAddAssignKernel( C, A, B, scalar );
6076  }
6077  //**********************************************************************************************
6078 
6079  //**Vectorized default addition assignment to dense matrices (large matrices)*******************
6094  template< typename MT3 // Type of the left-hand side target matrix
6095  , typename MT4 // Type of the left-hand side matrix operand
6096  , typename MT5 // Type of the right-hand side matrix operand
6097  , typename ST2 > // Type of the scalar value
6099  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6100  {
6101  if( LOW )
6102  lmmm( C, A, B, scalar, ST2(1) );
6103  else if( UPP )
6104  ummm( C, A, B, scalar, ST2(1) );
6105  else
6106  mmm( C, A, B, scalar, ST2(1) );
6107  }
6108  //**********************************************************************************************
6109 
6110  //**BLAS-based addition assignment to dense matrices (default)**********************************
6124  template< typename MT3 // Type of the left-hand side target matrix
6125  , typename MT4 // Type of the left-hand side matrix operand
6126  , typename MT5 // Type of the right-hand side matrix operand
6127  , typename ST2 > // Type of the scalar value
6129  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6130  {
6131  selectLargeAddAssignKernel( C, A, B, scalar );
6132  }
6133  //**********************************************************************************************
6134 
6135  //**BLAS-based addition assignment to dense matrices********************************************
6136 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
6137 
6150  template< typename MT3 // Type of the left-hand side target matrix
6151  , typename MT4 // Type of the left-hand side matrix operand
6152  , typename MT5 // Type of the right-hand side matrix operand
6153  , typename ST2 > // Type of the scalar value
6155  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6156  {
6157  typedef ElementType_<MT3> ET;
6158 
6159  if( IsTriangular<MT4>::value ) {
6160  ResultType_<MT3> tmp( serial( B ) );
6161  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6162  addAssign( C, tmp );
6163  }
6164  else if( IsTriangular<MT5>::value ) {
6165  ResultType_<MT3> tmp( serial( A ) );
6166  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6167  addAssign( C, tmp );
6168  }
6169  else {
6170  gemm( C, A, B, ET(scalar), ET(1) );
6171  }
6172  }
6173 #endif
6174  //**********************************************************************************************
6175 
6176  //**Restructuring addition assignment to column-major matrices**********************************
6190  template< typename MT > // Type of the target matrix
6192  addAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
6193  {
6195 
6197 
6198  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6199  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6200 
6201  const ForwardFunctor fwd;
6202 
6203  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
6204  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
6205 
6207  addAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
6208  else if( IsSymmetric<MT1>::value )
6209  addAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
6210  else
6211  addAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
6212  }
6213  //**********************************************************************************************
6214 
6215  //**Addition assignment to sparse matrices******************************************************
6216  // No special implementation for the addition assignment to sparse matrices.
6217  //**********************************************************************************************
6218 
6219  //**Subtraction assignment to dense matrices****************************************************
6231  template< typename MT // Type of the target dense matrix
6232  , bool SO > // Storage order of the target dense matrix
6234  subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
6235  {
6237 
6238  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6239  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6240 
6241  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
6242  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
6243 
6244  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
6245  return;
6246  }
6247 
6248  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
6249  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
6250 
6251  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
6252  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
6253  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
6254  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
6255  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
6256  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
6257 
6258  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
6259  }
6260  //**********************************************************************************************
6261 
6262  //**Subtraction assignment to dense matrices (kernel selection)*********************************
6273  template< typename MT3 // Type of the left-hand side target matrix
6274  , typename MT4 // Type of the left-hand side matrix operand
6275  , typename MT5 // Type of the right-hand side matrix operand
6276  , typename ST2 > // Type of the scalar value
6277  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6278  {
6279  if( ( IsDiagonal<MT5>::value ) ||
6280  ( !BLAZE_DEBUG_MODE && B.columns() <= SIMDSIZE*10UL ) ||
6281  ( C.rows() * C.columns() < DMATDMATMULT_THRESHOLD ) )
6282  selectSmallSubAssignKernel( C, A, B, scalar );
6283  else
6284  selectBlasSubAssignKernel( C, A, B, scalar );
6285  }
6286  //**********************************************************************************************
6287 
6288  //**Default subtraction assignment to dense matrices (general/general)**************************
6302  template< typename MT3 // Type of the left-hand side target matrix
6303  , typename MT4 // Type of the left-hand side matrix operand
6304  , typename MT5 // Type of the right-hand side matrix operand
6305  , typename ST2 > // Type of the scalar value
6306  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
6307  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6308  {
6309  const ResultType tmp( serial( A * B * scalar ) );
6310  subAssign( C, tmp );
6311  }
6312  //**********************************************************************************************
6313 
6314  //**Default subtraction assignment to dense matrices (general/diagonal)*************************
6328  template< typename MT3 // Type of the left-hand side target matrix
6329  , typename MT4 // Type of the left-hand side matrix operand
6330  , typename MT5 // Type of the right-hand side matrix operand
6331  , typename ST2 > // Type of the scalar value
6332  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6333  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6334  {
6336 
6337  const size_t M( A.rows() );
6338  const size_t N( B.columns() );
6339 
6340  for( size_t i=0UL; i<M; ++i )
6341  {
6342  const size_t jbegin( ( IsUpper<MT4>::value )
6343  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
6344  :( 0UL ) );
6345  const size_t jend( ( IsLower<MT4>::value )
6346  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
6347  :( N ) );
6348  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
6349 
6350  const size_t jnum( jend - jbegin );
6351  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
6352 
6353  for( size_t j=jbegin; j<jpos; j+=2UL ) {
6354  C(i,j ) -= A(i,j ) * B(j ,j ) * scalar;
6355  C(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
6356  }
6357  if( jpos < jend ) {
6358  C(i,jpos) -= A(i,jpos) * B(jpos,jpos) * scalar;
6359  }
6360  }
6361  }
6362  //**********************************************************************************************
6363 
6364  //**Default subtraction assignment to dense matrices (diagonal/general)*************************
6378  template< typename MT3 // Type of the left-hand side target matrix
6379  , typename MT4 // Type of the left-hand side matrix operand
6380  , typename MT5 // Type of the right-hand side matrix operand
6381  , typename ST2 > // Type of the scalar value
6382  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
6383  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6384  {
6386 
6387  const size_t M( A.rows() );
6388  const size_t N( B.columns() );
6389 
6390  for( size_t i=0UL; i<M; ++i )
6391  {
6392  const size_t jbegin( ( IsUpper<MT5>::value )
6393  ?( IsStrictlyUpper<MT5>::value ? i+1UL : i )
6394  :( 0UL ) );
6395  const size_t jend( ( IsLower<MT5>::value )
6396  ?( IsStrictlyLower<MT5>::value ? i : i+1UL )
6397  :( N ) );
6398  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
6399 
6400  const size_t jnum( jend - jbegin );
6401  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
6402 
6403  for( size_t j=jbegin; j<jpos; j+=2UL ) {
6404  C(i,j ) -= A(i,i) * B(i,j ) * scalar;
6405  C(i,j+1UL) -= A(i,i) * B(i,j+1UL) * scalar;
6406  }
6407  if( jpos < jend ) {
6408  C(i,jpos) -= A(i,i) * B(i,jpos) * scalar;
6409  }
6410  }
6411  }
6412  //**********************************************************************************************
6413 
6414  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
6428  template< typename MT3 // Type of the left-hand side target matrix
6429  , typename MT4 // Type of the left-hand side matrix operand
6430  , typename MT5 // Type of the right-hand side matrix operand
6431  , typename ST2 > // Type of the scalar value
6432  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
6433  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6434  {
6436 
6437  for( size_t i=0UL; i<A.rows(); ++i ) {
6438  C(i,i) -= A(i,i) * B(i,i) * scalar;
6439  }
6440  }
6441  //**********************************************************************************************
6442 
6443  //**Default subtraction assignment to dense matrices (small matrices)***************************
6457  template< typename MT3 // Type of the left-hand side target matrix
6458  , typename MT4 // Type of the left-hand side matrix operand
6459  , typename MT5 // Type of the right-hand side matrix operand
6460  , typename ST2 > // Type of the scalar value
6462  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6463  {
6464  selectDefaultSubAssignKernel( C, A, B, scalar );
6465  }
6466  //**********************************************************************************************
6467 
6468  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
6483  template< typename MT3 // Type of the left-hand side target matrix
6484  , typename MT4 // Type of the left-hand side matrix operand
6485  , typename MT5 // Type of the right-hand side matrix operand
6486  , typename ST2 > // Type of the scalar value
6488  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6489  {
6490  constexpr bool remainder( !IsPadded<MT3>::value || !IsPadded<MT5>::value );
6491 
6492  const size_t M( A.rows() );
6493  const size_t N( B.columns() );
6494  const size_t K( A.columns() );
6495 
6496  BLAZE_INTERNAL_ASSERT( !( LOW || UPP ) || ( M == N ), "Broken invariant detected" );
6497 
6498  const size_t jpos( remainder ? ( N & size_t(-SIMDSIZE) ) : N );
6499  BLAZE_INTERNAL_ASSERT( !remainder || ( N - ( N % SIMDSIZE ) ) == jpos, "Invalid end calculation" );
6500 
6501  const SIMDType factor( set( scalar ) );
6502 
6503  size_t j( 0UL );
6504 
6506  {
6507  for( ; !LOW && !UPP && (j+SIMDSIZE*7UL) < jpos; j+=SIMDSIZE*8UL ) {
6508  for( size_t i=0UL; i<M; ++i )
6509  {
6510  const size_t kbegin( ( IsUpper<MT4>::value )
6511  ?( ( IsLower<MT5>::value )
6512  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6513  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6514  :( IsLower<MT5>::value ? j : 0UL ) );
6515  const size_t kend( ( IsLower<MT4>::value )
6516  ?( ( IsUpper<MT5>::value )
6517  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL ), j+SIMDSIZE*8UL, K ) )
6518  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
6519  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*8UL, K ) : K ) );
6520 
6521  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6522 
6523  for( size_t k=kbegin; k<kend; ++k ) {
6524  const SIMDType a1( set( A(i,k) ) );
6525  xmm1 += a1 * B.load(k,j );
6526  xmm2 += a1 * B.load(k,j+SIMDSIZE );
6527  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
6528  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
6529  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
6530  xmm6 += a1 * B.load(k,j+SIMDSIZE*5UL);
6531  xmm7 += a1 * B.load(k,j+SIMDSIZE*6UL);
6532  xmm8 += a1 * B.load(k,j+SIMDSIZE*7UL);
6533  }
6534 
6535  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
6536  (~C).store( i, j+SIMDSIZE , (~C).load(i,j+SIMDSIZE ) - xmm2 * factor );
6537  (~C).store( i, j+SIMDSIZE*2UL, (~C).load(i,j+SIMDSIZE*2UL) - xmm3 * factor );
6538  (~C).store( i, j+SIMDSIZE*3UL, (~C).load(i,j+SIMDSIZE*3UL) - xmm4 * factor );
6539  (~C).store( i, j+SIMDSIZE*4UL, (~C).load(i,j+SIMDSIZE*4UL) - xmm5 * factor );
6540  (~C).store( i, j+SIMDSIZE*5UL, (~C).load(i,j+SIMDSIZE*5UL) - xmm6 * factor );
6541  (~C).store( i, j+SIMDSIZE*6UL, (~C).load(i,j+SIMDSIZE*6UL) - xmm7 * factor );
6542  (~C).store( i, j+SIMDSIZE*7UL, (~C).load(i,j+SIMDSIZE*7UL) - xmm8 * factor );
6543  }
6544  }
6545  }
6546 
6547  for( ; !LOW && !UPP && (j+SIMDSIZE*4UL) < jpos; j+=SIMDSIZE*5UL )
6548  {
6549  size_t i( 0UL );
6550 
6551  for( ; (i+2UL) <= M; i+=2UL )
6552  {
6553  const size_t kbegin( ( IsUpper<MT4>::value )
6554  ?( ( IsLower<MT5>::value )
6555  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6556  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6557  :( IsLower<MT5>::value ? j : 0UL ) );
6558  const size_t kend( ( IsLower<MT4>::value )
6559  ?( ( IsUpper<MT5>::value )
6560  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*5UL, K ) )
6561  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
6562  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*5UL, K ) : K ) );
6563 
6564  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;
6565 
6566  for( size_t k=kbegin; k<kend; ++k ) {
6567  const SIMDType a1( set( A(i ,k) ) );
6568  const SIMDType a2( set( A(i+1UL,k) ) );
6569  const SIMDType b1( B.load(k,j ) );
6570  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
6571  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
6572  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
6573  const SIMDType b5( B.load(k,j+SIMDSIZE*4UL) );
6574  xmm1 += a1 * b1;
6575  xmm2 += a1 * b2;
6576  xmm3 += a1 * b3;
6577  xmm4 += a1 * b4;
6578  xmm5 += a1 * b5;
6579  xmm6 += a2 * b1;
6580  xmm7 += a2 * b2;
6581  xmm8 += a2 * b3;
6582  xmm9 += a2 * b4;
6583  xmm10 += a2 * b5;
6584  }
6585 
6586  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
6587  (~C).store( i , j+SIMDSIZE , (~C).load(i ,j+SIMDSIZE ) - xmm2 * factor );
6588  (~C).store( i , j+SIMDSIZE*2UL, (~C).load(i ,j+SIMDSIZE*2UL) - xmm3 * factor );
6589  (~C).store( i , j+SIMDSIZE*3UL, (~C).load(i ,j+SIMDSIZE*3UL) - xmm4 * factor );
6590  (~C).store( i , j+SIMDSIZE*4UL, (~C).load(i ,j+SIMDSIZE*4UL) - xmm5 * factor );
6591  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm6 * factor );
6592  (~C).store( i+1UL, j+SIMDSIZE , (~C).load(i+1UL,j+SIMDSIZE ) - xmm7 * factor );
6593  (~C).store( i+1UL, j+SIMDSIZE*2UL, (~C).load(i+1UL,j+SIMDSIZE*2UL) - xmm8 * factor );
6594  (~C).store( i+1UL, j+SIMDSIZE*3UL, (~C).load(i+1UL,j+SIMDSIZE*3UL) - xmm9 * factor );
6595  (~C).store( i+1UL, j+SIMDSIZE*4UL, (~C).load(i+1UL,j+SIMDSIZE*4UL) - xmm10 * factor );
6596  }
6597 
6598  if( i < M )
6599  {
6600  const size_t kbegin( ( IsUpper<MT4>::value )
6601  ?( ( IsLower<MT5>::value )
6602  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6603  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6604  :( IsLower<MT5>::value ? j : 0UL ) );
6605  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*5UL, K ) ):( K ) );
6606 
6607  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5;
6608 
6609  for( size_t k=kbegin; k<kend; ++k ) {
6610  const SIMDType a1( set( A(i,k) ) );
6611  xmm1 += a1 * B.load(k,j );
6612  xmm2 += a1 * B.load(k,j+SIMDSIZE );
6613  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
6614  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
6615  xmm5 += a1 * B.load(k,j+SIMDSIZE*4UL);
6616  }
6617 
6618  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
6619  (~C).store( i, j+SIMDSIZE , (~C).load(i,j+SIMDSIZE ) - xmm2 * factor );
6620  (~C).store( i, j+SIMDSIZE*2UL, (~C).load(i,j+SIMDSIZE*2UL) - xmm3 * factor );
6621  (~C).store( i, j+SIMDSIZE*3UL, (~C).load(i,j+SIMDSIZE*3UL) - xmm4 * factor );
6622  (~C).store( i, j+SIMDSIZE*4UL, (~C).load(i,j+SIMDSIZE*4UL) - xmm5 * factor );
6623  }
6624  }
6625 
6626  for( ; !LOW && !UPP && (j+SIMDSIZE*3UL) < jpos; j+=SIMDSIZE*4UL )
6627  {
6628  size_t i( 0UL );
6629 
6630  for( ; (i+2UL) <= M; i+=2UL )
6631  {
6632  const size_t kbegin( ( IsUpper<MT4>::value )
6633  ?( ( IsLower<MT5>::value )
6634  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6635  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6636  :( IsLower<MT5>::value ? j : 0UL ) );
6637  const size_t kend( ( IsLower<MT4>::value )
6638  ?( ( IsUpper<MT5>::value )
6639  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*4UL, K ) )
6640  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
6641  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*4UL, K ) : K ) );
6642 
6643  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6644 
6645  for( size_t k=kbegin; k<kend; ++k ) {
6646  const SIMDType a1( set( A(i ,k) ) );
6647  const SIMDType a2( set( A(i+1UL,k) ) );
6648  const SIMDType b1( B.load(k,j ) );
6649  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
6650  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
6651  const SIMDType b4( B.load(k,j+SIMDSIZE*3UL) );
6652  xmm1 += a1 * b1;
6653  xmm2 += a1 * b2;
6654  xmm3 += a1 * b3;
6655  xmm4 += a1 * b4;
6656  xmm5 += a2 * b1;
6657  xmm6 += a2 * b2;
6658  xmm7 += a2 * b3;
6659  xmm8 += a2 * b4;
6660  }
6661 
6662  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
6663  (~C).store( i , j+SIMDSIZE , (~C).load(i ,j+SIMDSIZE ) - xmm2 * factor );
6664  (~C).store( i , j+SIMDSIZE*2UL, (~C).load(i ,j+SIMDSIZE*2UL) - xmm3 * factor );
6665  (~C).store( i , j+SIMDSIZE*3UL, (~C).load(i ,j+SIMDSIZE*3UL) - xmm4 * factor );
6666  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
6667  (~C).store( i+1UL, j+SIMDSIZE , (~C).load(i+1UL,j+SIMDSIZE ) - xmm6 * factor );
6668  (~C).store( i+1UL, j+SIMDSIZE*2UL, (~C).load(i+1UL,j+SIMDSIZE*2UL) - xmm7 * factor );
6669  (~C).store( i+1UL, j+SIMDSIZE*3UL, (~C).load(i+1UL,j+SIMDSIZE*3UL) - xmm8 * factor );
6670  }
6671 
6672  if( i < M )
6673  {
6674  const size_t kbegin( ( IsUpper<MT4>::value )
6675  ?( ( IsLower<MT5>::value )
6676  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6677  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6678  :( IsLower<MT5>::value ? j : 0UL ) );
6679  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*4UL, K ) ):( K ) );
6680 
6681  SIMDType xmm1, xmm2, xmm3, xmm4;
6682 
6683  for( size_t k=kbegin; k<kend; ++k ) {
6684  const SIMDType a1( set( A(i,k) ) );
6685  xmm1 += a1 * B.load(k,j );
6686  xmm2 += a1 * B.load(k,j+SIMDSIZE );
6687  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
6688  xmm4 += a1 * B.load(k,j+SIMDSIZE*3UL);
6689  }
6690 
6691  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
6692  (~C).store( i, j+SIMDSIZE , (~C).load(i,j+SIMDSIZE ) - xmm2 * factor );
6693  (~C).store( i, j+SIMDSIZE*2UL, (~C).load(i,j+SIMDSIZE*2UL) - xmm3 * factor );
6694  (~C).store( i, j+SIMDSIZE*3UL, (~C).load(i,j+SIMDSIZE*3UL) - xmm4 * factor );
6695  }
6696  }
6697 
6698  for( ; !LOW && !UPP && (j+SIMDSIZE*2UL) < jpos; j+=SIMDSIZE*3UL )
6699  {
6700  size_t i( 0UL );
6701 
6702  for( ; (i+2UL) <= M; i+=2UL )
6703  {
6704  const size_t kbegin( ( IsUpper<MT4>::value )
6705  ?( ( IsLower<MT5>::value )
6706  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6707  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6708  :( IsLower<MT5>::value ? j : 0UL ) );
6709  const size_t kend( ( IsLower<MT4>::value )
6710  ?( ( IsUpper<MT5>::value )
6711  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*3UL, K ) )
6712  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
6713  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*3UL, K ) : K ) );
6714 
6715  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6;
6716 
6717  for( size_t k=kbegin; k<kend; ++k ) {
6718  const SIMDType a1( set( A(i ,k) ) );
6719  const SIMDType a2( set( A(i+1UL,k) ) );
6720  const SIMDType b1( B.load(k,j ) );
6721  const SIMDType b2( B.load(k,j+SIMDSIZE ) );
6722  const SIMDType b3( B.load(k,j+SIMDSIZE*2UL) );
6723  xmm1 += a1 * b1;
6724  xmm2 += a1 * b2;
6725  xmm3 += a1 * b3;
6726  xmm4 += a2 * b1;
6727  xmm5 += a2 * b2;
6728  xmm6 += a2 * b3;
6729  }
6730 
6731  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
6732  (~C).store( i , j+SIMDSIZE , (~C).load(i ,j+SIMDSIZE ) - xmm2 * factor );
6733  (~C).store( i , j+SIMDSIZE*2UL, (~C).load(i ,j+SIMDSIZE*2UL) - xmm3 * factor );
6734  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm4 * factor );
6735  (~C).store( i+1UL, j+SIMDSIZE , (~C).load(i+1UL,j+SIMDSIZE ) - xmm5 * factor );
6736  (~C).store( i+1UL, j+SIMDSIZE*2UL, (~C).load(i+1UL,j+SIMDSIZE*2UL) - xmm6 * factor );
6737  }
6738 
6739  if( i < M )
6740  {
6741  const size_t kbegin( ( IsUpper<MT4>::value )
6742  ?( ( IsLower<MT5>::value )
6743  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6744  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6745  :( IsLower<MT5>::value ? j : 0UL ) );
6746  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*3UL, K ) ):( K ) );
6747 
6748  SIMDType xmm1, xmm2, xmm3;
6749 
6750  for( size_t k=kbegin; k<kend; ++k ) {
6751  const SIMDType a1( set( A(i,k) ) );
6752  xmm1 += a1 * B.load(k,j );
6753  xmm2 += a1 * B.load(k,j+SIMDSIZE );
6754  xmm3 += a1 * B.load(k,j+SIMDSIZE*2UL);
6755  }
6756 
6757  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
6758  (~C).store( i, j+SIMDSIZE , (~C).load(i,j+SIMDSIZE ) - xmm2 * factor );
6759  (~C).store( i, j+SIMDSIZE*2UL, (~C).load(i,j+SIMDSIZE*2UL) - xmm3 * factor );
6760  }
6761  }
6762 
6763  for( ; !( LOW && UPP ) && (j+SIMDSIZE) < jpos; j+=SIMDSIZE*2UL )
6764  {
6765  const size_t iend( UPP ? min(j+SIMDSIZE*2UL,M) : M );
6766  size_t i( LOW ? j : 0UL );
6767 
6768  for( ; (i+2UL) <= iend; i+=2UL )
6769  {
6770  const size_t kbegin( ( IsUpper<MT4>::value )
6771  ?( ( IsLower<MT5>::value )
6772  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6773  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6774  :( IsLower<MT5>::value ? j : 0UL ) );
6775  const size_t kend( ( IsLower<MT4>::value )
6776  ?( ( IsUpper<MT5>::value )
6777  ?( min( ( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ), j+SIMDSIZE*2UL, K ) )
6778  :( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL ) )
6779  :( IsUpper<MT5>::value ? min( j+SIMDSIZE*2UL, K ) : K ) );
6780 
6781  SIMDType xmm1, xmm2, xmm3, xmm4;
6782 
6783  for( size_t k=kbegin; k<kend; ++k ) {
6784  const SIMDType a1( set( A(i ,k) ) );
6785  const SIMDType a2( set( A(i+1UL,k) ) );
6786  const SIMDType b1( B.load(k,j ) );
6787  const SIMDType b2( B.load(k,j+SIMDSIZE) );
6788  xmm1 += a1 * b1;
6789  xmm2 += a1 * b2;
6790  xmm3 += a2 * b1;
6791  xmm4 += a2 * b2;
6792  }
6793 
6794  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
6795  (~C).store( i , j+SIMDSIZE, (~C).load(i ,j+SIMDSIZE) - xmm2 * factor );
6796  (~C).store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
6797  (~C).store( i+1UL, j+SIMDSIZE, (~C).load(i+1UL,j+SIMDSIZE) - xmm4 * factor );
6798  }
6799 
6800  if( i < iend )
6801  {
6802  const size_t kbegin( ( IsUpper<MT4>::value )
6803  ?( ( IsLower<MT5>::value )
6804  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6805  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6806  :( IsLower<MT5>::value ? j : 0UL ) );
6807  const size_t kend( ( IsUpper<MT5>::value )?( min( j+SIMDSIZE*2UL, K ) ):( K ) );
6808 
6809  SIMDType xmm1, xmm2;
6810 
6811  for( size_t k=kbegin; k<kend; ++k ) {
6812  const SIMDType a1( set( A(i,k) ) );
6813  xmm1 += a1 * B.load(k,j );
6814  xmm2 += a1 * B.load(k,j+SIMDSIZE);
6815  }
6816 
6817  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
6818  (~C).store( i, j+SIMDSIZE, (~C).load(i,j+SIMDSIZE) - xmm2 * factor );
6819  }
6820  }
6821 
6822  for( ; j<jpos; j+=SIMDSIZE )
6823  {
6824  const size_t iend( LOW && UPP ? min(j+SIMDSIZE,M) : M );
6825  size_t i( LOW ? j : 0UL );
6826 
6827  for( ; (i+2UL) <= iend; i+=2UL )
6828  {
6829  const size_t kbegin( ( IsUpper<MT4>::value )
6830  ?( ( IsLower<MT5>::value )
6831  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6832  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6833  :( IsLower<MT5>::value ? j : 0UL ) );
6834  const size_t kend( ( IsLower<MT4>::value )
6835  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
6836  :( K ) );
6837 
6838  SIMDType xmm1, xmm2;
6839 
6840  for( size_t k=kbegin; k<kend; ++k ) {
6841  const SIMDType b1( B.load(k,j) );
6842  xmm1 += set( A(i ,k) ) * b1;
6843  xmm2 += set( A(i+1UL,k) ) * b1;
6844  }
6845 
6846  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
6847  (~C).store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
6848  }
6849 
6850  if( i < iend )
6851  {
6852  const size_t kbegin( ( IsUpper<MT4>::value )
6853  ?( ( IsLower<MT5>::value )
6854  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6855  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6856  :( IsLower<MT5>::value ? j : 0UL ) );
6857 
6858  SIMDType xmm1;
6859 
6860  for( size_t k=kbegin; k<K; ++k ) {
6861  xmm1 += set( A(i,k) ) * B.load(k,j);
6862  }
6863 
6864  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
6865  }
6866  }
6867 
6868  for( ; remainder && j<N; ++j )
6869  {
6870  const size_t iend( UPP ? j+1UL : M );
6871  size_t i( LOW ? j : 0UL );
6872 
6873  for( ; (i+2UL) <= iend; i+=2UL )
6874  {
6875  const size_t kbegin( ( IsUpper<MT4>::value )
6876  ?( ( IsLower<MT5>::value )
6877  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6878  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6879  :( IsLower<MT5>::value ? j : 0UL ) );
6880  const size_t kend( ( IsLower<MT4>::value )
6881  ?( IsStrictlyLower<MT4>::value ? i+1UL : i+2UL )
6882  :( K ) );
6883 
6884  ElementType value1 = ElementType();
6885  ElementType value2 = ElementType();
6886 
6887  for( size_t k=kbegin; k<kend; ++k ) {
6888  value1 += A(i ,k) * B(k,j);
6889  value2 += A(i+1UL,k) * B(k,j);
6890  }
6891 
6892  (~C)(i ,j) -= value1 * scalar;
6893  (~C)(i+1UL,j) -= value2 * scalar;
6894  }
6895 
6896  if( i < iend )
6897  {
6898  const size_t kbegin( ( IsUpper<MT4>::value )
6899  ?( ( IsLower<MT5>::value )
6900  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i ), j ) )
6901  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
6902  :( IsLower<MT5>::value ? j : 0UL ) );
6903 
6904  ElementType value = ElementType();
6905 
6906  for( size_t k=kbegin; k<K; ++k ) {
6907  value += A(i,k) * B(k,j);
6908  }
6909 
6910  (~C)(i,j) -= value * scalar;
6911  }
6912  }
6913  }
6914  //**********************************************************************************************
6915 
6916  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
6930  template< typename MT3 // Type of the left-hand side target matrix
6931  , typename MT4 // Type of the left-hand side matrix operand
6932  , typename MT5 // Type of the right-hand side matrix operand
6933  , typename ST2 > // Type of the scalar value
6935  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6936  {
6941 
6942  const ForwardFunctor fwd;
6943 
6945  const OppositeType_<MT4> tmp( serial( A ) );
6946  subAssign( ~C, fwd( tmp * B ) * scalar );
6947  }
6949  const OppositeType_<MT5> tmp( serial( B ) );
6950  subAssign( ~C, fwd( A * tmp ) * scalar );
6951  }
6952  else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
6953  const OppositeType_<MT4> tmp( serial( A ) );
6954  subAssign( ~C, fwd( tmp * B ) * scalar );
6955  }
6956  else {
6957  const OppositeType_<MT5> tmp( serial( B ) );
6958  subAssign( ~C, fwd( A * tmp ) * scalar );
6959  }
6960  }
6961  //**********************************************************************************************
6962 
6963  //**Default subtraction assignment to dense matrices (large matrices)***************************
6977  template< typename MT3 // Type of the left-hand side target matrix
6978  , typename MT4 // Type of the left-hand side matrix operand
6979  , typename MT5 // Type of the right-hand side matrix operand
6980  , typename ST2 > // Type of the scalar value
6982  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6983  {
6984  selectDefaultSubAssignKernel( C, A, B, scalar );
6985  }
6986  //**********************************************************************************************
6987 
6988  //**Vectorized default subtraction assignment to dense matrices (large matrices)****************
7003  template< typename MT3 // Type of the left-hand side target matrix
7004  , typename MT4 // Type of the left-hand side matrix operand
7005  , typename MT5 // Type of the right-hand side matrix operand
7006  , typename ST2 > // Type of the scalar value
7008  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7009  {
7010  if( LOW )
7011  lmmm( C, A, B, -scalar, ST2(1) );
7012  else if( UPP )
7013  ummm( C, A, B, -scalar, ST2(1) );
7014  else
7015  mmm( C, A, B, -scalar, ST2(1) );
7016  }
7017  //**********************************************************************************************
7018 
7019  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
7033  template< typename MT3 // Type of the left-hand side target matrix
7034  , typename MT4 // Type of the left-hand side matrix operand
7035  , typename MT5 // Type of the right-hand side matrix operand
7036  , typename ST2 > // Type of the scalar value
7038  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7039  {
7040  selectLargeSubAssignKernel( C, A, B, scalar );
7041  }
7042  //**********************************************************************************************
7043 
7044  //**BLAS-based subraction assignment to dense matrices******************************************
7045 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
7046 
7059  template< typename MT3 // Type of the left-hand side target matrix
7060  , typename MT4 // Type of the left-hand side matrix operand
7061  , typename MT5 // Type of the right-hand side matrix operand
7062  , typename ST2 > // Type of the scalar value
7064  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7065  {
7066  typedef ElementType_<MT3> ET;
7067 
7068  if( IsTriangular<MT4>::value ) {
7069  ResultType_<MT3> tmp( serial( B ) );
7070  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7071  subAssign( C, tmp );
7072  }
7073  else if( IsTriangular<MT5>::value ) {
7074  ResultType_<MT3> tmp( serial( A ) );
7075  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7076  subAssign( C, tmp );
7077  }
7078  else {
7079  gemm( C, A, B, ET(-scalar), ET(1) );
7080  }
7081  }
7082 #endif
7083  //**********************************************************************************************
7084 
7085  //**Restructuring subtraction assignment to column-major matrices*******************************
7099  template< typename MT > // Type of the target matrix
7101  subAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
7102  {
7104 
7106 
7107  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7108  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7109 
7110  const ForwardFunctor fwd;
7111 
7112  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7113  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7114 
7116  subAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
7117  else if( IsSymmetric<MT1>::value )
7118  subAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
7119  else
7120  subAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
7121  }
7122  //**********************************************************************************************
7123 
7124  //**Subtraction assignment to sparse matrices***************************************************
7125  // No special implementation for the subtraction assignment to sparse matrices.
7126  //**********************************************************************************************
7127 
7128  //**Multiplication assignment to dense matrices*************************************************
7129  // No special implementation for the multiplication assignment to dense matrices.
7130  //**********************************************************************************************
7131 
7132  //**Multiplication assignment to sparse matrices************************************************
7133  // No special implementation for the multiplication assignment to sparse matrices.
7134  //**********************************************************************************************
7135 
7136  //**SMP assignment to dense matrices************************************************************
7151  template< typename MT // Type of the target dense matrix
7152  , bool SO > // Storage order of the target dense matrix
7154  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7155  {
7157 
7158  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7159  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7160 
7161  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7162  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7163 
7164  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
7165  return;
7166  }
7167  else if( left.columns() == 0UL ) {
7168  reset( ~lhs );
7169  return;
7170  }
7171 
7172  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7173  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7174 
7175  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7176  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7177  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7178  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7179  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7180  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7181 
7182  smpAssign( ~lhs, A * B * rhs.scalar_ );
7183  }
7184  //**********************************************************************************************
7185 
7186  //**SMP assignment to sparse matrices***********************************************************
7201  template< typename MT // Type of the target sparse matrix
7202  , bool SO > // Storage order of the target sparse matrix
7205  {
7207 
7209 
7216 
7217  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7218  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7219 
7220  const ForwardFunctor fwd;
7221 
7222  const TmpType tmp( rhs );
7223  smpAssign( ~lhs, fwd( tmp ) );
7224  }
7225  //**********************************************************************************************
7226 
7227  //**Restructuring SMP assignment to column-major matrices***************************************
7241  template< typename MT > // Type of the target matrix
7243  smpAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
7244  {
7246 
7248 
7249  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7250  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7251 
7252  const ForwardFunctor fwd;
7253 
7254  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7255  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7256 
7258  smpAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
7259  else if( IsSymmetric<MT1>::value )
7260  smpAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
7261  else
7262  smpAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
7263  }
7264  //**********************************************************************************************
7265 
7266  //**SMP addition assignment to dense matrices***************************************************
7281  template< typename MT // Type of the target dense matrix
7282  , bool SO > // Storage order of the target dense matrix
7285  {
7287 
7288  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7289  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7290 
7291  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7292  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7293 
7294  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7295  return;
7296  }
7297 
7298  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7299  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7300 
7301  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7302  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7303  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7304  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7305  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7306  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7307 
7308  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
7309  }
7310  //**********************************************************************************************
7311 
7312  //**Restructuring SMP addition assignment to column-major matrices******************************
7326  template< typename MT > // Type of the target matrix
7328  smpAddAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
7329  {
7331 
7333 
7334  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7335  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7336 
7337  const ForwardFunctor fwd;
7338 
7339  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7340  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7341 
7343  smpAddAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
7344  else if( IsSymmetric<MT1>::value )
7345  smpAddAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
7346  else
7347  smpAddAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
7348  }
7349  //**********************************************************************************************
7350 
7351  //**SMP addition assignment to sparse matrices**************************************************
7352  // No special implementation for the SMP addition assignment to sparse matrices.
7353  //**********************************************************************************************
7354 
7355  //**SMP subtraction assignment to dense matrices************************************************
7370  template< typename MT // Type of the target dense matrix
7371  , bool SO > // Storage order of the target dense matrix
7374  {
7376 
7377  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7378  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7379 
7380  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7381  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7382 
7383  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7384  return;
7385  }
7386 
7387  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7388  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7389 
7390  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7391  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7392  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7393  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7394  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7395  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7396 
7397  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
7398  }
7399  //**********************************************************************************************
7400 
7401  //**Restructuring SMP subtraction assignment to column-major matrices***************************
7415  template< typename MT > // Type of the target matrix
7417  smpSubAssign( Matrix<MT,true>& lhs, const DMatScalarMultExpr& rhs )
7418  {
7420 
7422 
7423  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7424  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7425 
7426  const ForwardFunctor fwd;
7427 
7428  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7429  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7430 
7432  smpSubAssign( ~lhs, fwd( trans( left ) * trans( right ) ) * rhs.scalar_ );
7433  else if( IsSymmetric<MT1>::value )
7434  smpSubAssign( ~lhs, fwd( trans( left ) * right ) * rhs.scalar_ );
7435  else
7436  smpSubAssign( ~lhs, fwd( left * trans( right ) ) * rhs.scalar_ );
7437  }
7438  //**********************************************************************************************
7439 
7440  //**SMP subtraction assignment to sparse matrices***********************************************
7441  // No special implementation for the SMP subtraction assignment to sparse matrices.
7442  //**********************************************************************************************
7443 
7444  //**SMP multiplication assignment to dense matrices*********************************************
7445  // No special implementation for the SMP multiplication assignment to dense matrices.
7446  //**********************************************************************************************
7447 
7448  //**SMP multiplication assignment to sparse matrices********************************************
7449  // No special implementation for the SMP multiplication assignment to sparse matrices.
7450  //**********************************************************************************************
7451 
7452  //**Compile time checks*************************************************************************
7460  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
7461  //**********************************************************************************************
7462 };
7464 //*************************************************************************************************
7465 
7466 
7467 
7468 
7469 //=================================================================================================
7470 //
7471 // GLOBAL BINARY ARITHMETIC OPERATORS
7472 //
7473 //=================================================================================================
7474 
7475 //*************************************************************************************************
7502 template< typename T1 // Type of the left-hand side dense matrix
7503  , typename T2 > // Type of the right-hand side dense matrix
7506 {
7508 
7509  if( (~lhs).columns() != (~rhs).rows() ) {
7510  BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
7511  }
7512 
7514 }
7515 //*************************************************************************************************
7516 
7517 
7518 
7519 
7520 //=================================================================================================
7521 //
7522 // GLOBAL FUNCTIONS
7523 //
7524 //=================================================================================================
7525 
7526 //*************************************************************************************************
7549 template< typename MT1 // Type of the left-hand side dense matrix
7550  , typename MT2 // Type of the right-hand side dense matrix
7551  , bool SF // Symmetry flag
7552  , bool HF // Hermitian flag
7553  , bool LF // Lower flag
7554  , bool UF > // Upper flag
7557 {
7559 
7560  if( !isSquare( dm ) ) {
7561  BLAZE_THROW_INVALID_ARGUMENT( "Invalid symmetric matrix specification" );
7562  }
7563 
7565 }
7567 //*************************************************************************************************
7568 
7569 
7570 //*************************************************************************************************
7593 template< typename MT1 // Type of the left-hand side dense matrix
7594  , typename MT2 // Type of the right-hand side dense matrix
7595  , bool SF // Symmetry flag
7596  , bool HF // Hermitian flag
7597  , bool LF // Lower flag
7598  , bool UF > // Upper flag
7601 {
7603 
7604  if( !isSquare( dm ) ) {
7605  BLAZE_THROW_INVALID_ARGUMENT( "Invalid Hermitian matrix specification" );
7606  }
7607 
7609 }
7611 //*************************************************************************************************
7612 
7613 
7614 //*************************************************************************************************
7637 template< typename MT1 // Type of the left-hand side dense matrix
7638  , typename MT2 // Type of the right-hand side dense matrix
7639  , bool SF // Symmetry flag
7640  , bool HF // Hermitian flag
7641  , bool LF // Lower flag
7642  , bool UF > // Upper flag
7645 {
7647 
7648  if( !isSquare( dm ) ) {
7649  BLAZE_THROW_INVALID_ARGUMENT( "Invalid lower matrix specification" );
7650  }
7651 
7653 }
7655 //*************************************************************************************************
7656 
7657 
7658 //*************************************************************************************************
7681 template< typename MT1 // Type of the left-hand side dense matrix
7682  , typename MT2 // Type of the right-hand side dense matrix
7683  , bool SF // Symmetry flag
7684  , bool HF // Hermitian flag
7685  , bool LF // Lower flag
7686  , bool UF > // Upper flag
7689 {
7691 
7692  if( !isSquare( dm ) ) {
7693  BLAZE_THROW_INVALID_ARGUMENT( "Invalid upper matrix specification" );
7694  }
7695 
7697 }
7699 //*************************************************************************************************
7700 
7701 
7702 //*************************************************************************************************
7725 template< typename MT1 // Type of the left-hand side dense matrix
7726  , typename MT2 // Type of the right-hand side dense matrix
7727  , bool SF // Symmetry flag
7728  , bool HF // Hermitian flag
7729  , bool LF // Lower flag
7730  , bool UF > // Upper flag
7733 {
7735 
7736  if( !isSquare( dm ) ) {
7737  BLAZE_THROW_INVALID_ARGUMENT( "Invalid diagonal matrix specification" );
7738  }
7739 
7741 }
7743 //*************************************************************************************************
7744 
7745 
7746 
7747 
7748 //=================================================================================================
7749 //
7750 // ROWS SPECIALIZATIONS
7751 //
7752 //=================================================================================================
7753 
7754 //*************************************************************************************************
7756 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7757 struct Rows< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> > : public Rows<MT1>
7758 {};
7760 //*************************************************************************************************
7761 
7762 
7763 
7764 
7765 //=================================================================================================
7766 //
7767 // COLUMNS SPECIALIZATIONS
7768 //
7769 //=================================================================================================
7770 
7771 //*************************************************************************************************
7773 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7774 struct Columns< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> > : public Columns<MT2>
7775 {};
7777 //*************************************************************************************************
7778 
7779 
7780 
7781 
7782 //=================================================================================================
7783 //
7784 // ISALIGNED SPECIALIZATIONS
7785 //
7786 //=================================================================================================
7787 
7788 //*************************************************************************************************
7790 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7791 struct IsAligned< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7792  : public BoolConstant< And< IsAligned<MT1>, IsAligned<MT2> >::value >
7793 {};
7795 //*************************************************************************************************
7796 
7797 
7798 
7799 
7800 //=================================================================================================
7801 //
7802 // ISSYMMETRIC SPECIALIZATIONS
7803 //
7804 //=================================================================================================
7805 
7806 //*************************************************************************************************
7808 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7809 struct IsSymmetric< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7810  : public BoolConstant< Or< Bool<SF>
7811  , And< Bool<HF>
7812  , IsBuiltin< ElementType_< DMatDMatMultExpr<MT1,MT2,false,true,false,false> > > >
7813  , And< Bool<LF>, Bool<UF> > >::value >
7814 {};
7816 //*************************************************************************************************
7817 
7818 
7819 
7820 
7821 //=================================================================================================
7822 //
7823 // ISHERMITIAN SPECIALIZATIONS
7824 //
7825 //=================================================================================================
7826 
7827 //*************************************************************************************************
7829 template< typename MT1, typename MT2, bool SF, bool LF, bool UF >
7830 struct IsHermitian< DMatDMatMultExpr<MT1,MT2,SF,true,LF,UF> >
7831  : public TrueType
7832 {};
7834 //*************************************************************************************************
7835 
7836 
7837 
7838 
7839 //=================================================================================================
7840 //
7841 // ISLOWER SPECIALIZATIONS
7842 //
7843 //=================================================================================================
7844 
7845 //*************************************************************************************************
7847 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7848 struct IsLower< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7849  : public BoolConstant< Or< Bool<LF>
7850  , And< IsLower<MT1>, IsLower<MT2> >
7851  , And< Or< Bool<SF>, Bool<HF> >
7852  , IsUpper<MT1>, IsUpper<MT2> > >::value >
7853 {};
7855 //*************************************************************************************************
7856 
7857 
7858 
7859 
7860 //=================================================================================================
7861 //
7862 // ISUNILOWER SPECIALIZATIONS
7863 //
7864 //=================================================================================================
7865 
7866 //*************************************************************************************************
7868 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7869 struct IsUniLower< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7870  : public BoolConstant< Or< And< IsUniLower<MT1>, IsUniLower<MT2> >
7871  , And< Or< Bool<SF>, Bool<HF> >
7872  , IsUniUpper<MT1>, IsUniUpper<MT2> > >::value >
7873 {};
7875 //*************************************************************************************************
7876 
7877 
7878 
7879 
7880 //=================================================================================================
7881 //
7882 // ISSTRICTLYLOWER SPECIALIZATIONS
7883 //
7884 //=================================================================================================
7885 
7886 //*************************************************************************************************
7888 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7889 struct IsStrictlyLower< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7890  : public BoolConstant< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
7891  , And< IsStrictlyLower<MT2>, IsLower<MT1> >
7892  , And< Or< Bool<SF>, Bool<HF> >
7893  , Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
7894  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > > > >::value >
7895 {};
7897 //*************************************************************************************************
7898 
7899 
7900 
7901 
7902 //=================================================================================================
7903 //
7904 // ISUPPER SPECIALIZATIONS
7905 //
7906 //=================================================================================================
7907 
7908 //*************************************************************************************************
7910 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7911 struct IsUpper< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7912  : public BoolConstant< Or< Bool<UF>
7913  , And< IsUpper<MT1>, IsUpper<MT2> >
7914  , And< Or< Bool<SF>, Bool<HF> >
7915  , IsLower<MT1>, IsLower<MT2> > >::value >
7916 {};
7918 //*************************************************************************************************
7919 
7920 
7921 
7922 
7923 //=================================================================================================
7924 //
7925 // ISUNIUPPER SPECIALIZATIONS
7926 //
7927 //=================================================================================================
7928 
7929 //*************************************************************************************************
7931 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7932 struct IsUniUpper< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7933  : public BoolConstant< Or< And< IsUniUpper<MT1>, IsUniUpper<MT2> >
7934  , And< Or< Bool<SF>, Bool<HF> >
7935  , IsUniLower<MT1>, IsUniLower<MT2> > >::value >
7936 {};
7938 //*************************************************************************************************
7939 
7940 
7941 
7942 
7943 //=================================================================================================
7944 //
7945 // ISSTRICTLYUPPER SPECIALIZATIONS
7946 //
7947 //=================================================================================================
7948 
7949 //*************************************************************************************************
7951 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
7952 struct IsStrictlyUpper< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
7953  : public BoolConstant< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
7954  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> >
7955  , And< Or< Bool<SF>, Bool<HF> >
7956  , Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
7957  , And< IsStrictlyLower<MT2>, IsLower<MT1> > > > >::value >
7958 {};
7960 //*************************************************************************************************
7961 
7962 
7963 
7964 
7965 //=================================================================================================
7966 //
7967 // EXPRESSION TRAIT SPECIALIZATIONS
7968 //
7969 //=================================================================================================
7970 
7971 //*************************************************************************************************
7973 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF, typename VT >
7974 struct DMatDVecMultExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, VT >
7975 {
7976  public:
7977  //**********************************************************************************************
7982  , INVALID_TYPE >;
7983  //**********************************************************************************************
7984 };
7986 //*************************************************************************************************
7987 
7988 
7989 //*************************************************************************************************
7991 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF, typename VT >
7992 struct DMatSVecMultExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, VT >
7993 {
7994  public:
7995  //**********************************************************************************************
8000  , INVALID_TYPE >;
8001  //**********************************************************************************************
8002 };
8004 //*************************************************************************************************
8005 
8006 
8007 //*************************************************************************************************
8009 template< typename VT, typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8010 struct TDVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8011 {
8012  public:
8013  //**********************************************************************************************
8018  , INVALID_TYPE >;
8019  //**********************************************************************************************
8020 };
8022 //*************************************************************************************************
8023 
8024 
8025 //*************************************************************************************************
8027 template< typename VT, typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8028 struct TSVecDMatMultExprTrait< VT, DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8029 {
8030  public:
8031  //**********************************************************************************************
8036  , INVALID_TYPE >;
8037  //**********************************************************************************************
8038 };
8040 //*************************************************************************************************
8041 
8042 
8043 //*************************************************************************************************
8045 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8046 struct DMatDeclSymExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8047 {
8048  public:
8049  //**********************************************************************************************
8053  , INVALID_TYPE >;
8054  //**********************************************************************************************
8055 };
8057 //*************************************************************************************************
8058 
8059 
8060 //*************************************************************************************************
8062 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8063 struct DMatDeclHermExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8064 {
8065  public:
8066  //**********************************************************************************************
8070  , INVALID_TYPE >;
8071  //**********************************************************************************************
8072 };
8074 //*************************************************************************************************
8075 
8076 
8077 //*************************************************************************************************
8079 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8080 struct DMatDeclLowExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8081 {
8082  public:
8083  //**********************************************************************************************
8087  , INVALID_TYPE >;
8088  //**********************************************************************************************
8089 };
8091 //*************************************************************************************************
8092 
8093 
8094 //*************************************************************************************************
8096 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8097 struct DMatDeclUppExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8098 {
8099  public:
8100  //**********************************************************************************************
8104  , INVALID_TYPE >;
8105  //**********************************************************************************************
8106 };
8108 //*************************************************************************************************
8109 
8110 
8111 //*************************************************************************************************
8113 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8114 struct DMatDeclDiagExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8115 {
8116  public:
8117  //**********************************************************************************************
8121  , INVALID_TYPE >;
8122  //**********************************************************************************************
8123 };
8125 //*************************************************************************************************
8126 
8127 
8128 //*************************************************************************************************
8130 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF, bool AF >
8131 struct SubmatrixExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF>, AF >
8132 {
8133  public:
8134  //**********************************************************************************************
8137  //**********************************************************************************************
8138 };
8140 //*************************************************************************************************
8141 
8142 
8143 //*************************************************************************************************
8145 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8146 struct RowExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8147 {
8148  public:
8149  //**********************************************************************************************
8150  using Type = MultExprTrait_< RowExprTrait_<const MT1>, MT2 >;
8151  //**********************************************************************************************
8152 };
8154 //*************************************************************************************************
8155 
8156 
8157 //*************************************************************************************************
8159 template< typename MT1, typename MT2, bool SF, bool HF, bool LF, bool UF >
8160 struct ColumnExprTrait< DMatDMatMultExpr<MT1,MT2,SF,HF,LF,UF> >
8161 {
8162  public:
8163  //**********************************************************************************************
8165  //**********************************************************************************************
8166 };
8168 //*************************************************************************************************
8169 
8170 } // namespace blaze
8171 
8172 #endif
typename SubmatrixExprTrait< MT, AF >::Type SubmatrixExprTrait_
Auxiliary alias declaration for the SubmatrixExprTrait type trait.The SubmatrixExprTrait_ alias decla...
Definition: SubmatrixExprTrait.h:134
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:344
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
ElementType_< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:179
Evaluation of the expression type of a dense matrix declupp operation.Via this type trait it is possi...
Definition: DMatDeclUppExprTrait.h:75
Compile time check for row vector types.This type trait tests whether or not the given template argum...
Definition: IsRowVector.h:80
const DMatForEachExpr< MT, Conj, SO > conj(const DenseMatrix< MT, SO > &dm)
Returns a matrix containing the complex conjugate of each single element of dm.
Definition: DMatForEachExpr.h:1214
Header file for auxiliary alias declarations.
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:72
Constraint on the data type.
Header file for kernel specific block sizes.
Header file for mathematical functions.
constexpr bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:433
Compile time check for low-level access to constant data.This type trait tests whether the given data...
Definition: HasConstDataAccess.h:75
Header file for the Rows type trait.
MultTrait_< RT1, RT2 > ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:300
Header file for the IsUniUpper type trait.
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:87
Header file for basic type definitions.
Header file for the SparseVector base class.
Header file for the DMatDeclDiagExprTrait class template.
If_< IsExpression< MT2 >, const MT2, const MT2 &> RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:312
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:507
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the serial shim.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:305
Header file for the IsDiagonal type trait.
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: DMatScalarMultExpr.h:560
Generic wrapper for a compile time constant integral value.The IntegralConstant class template repres...
Definition: IntegralConstant.h:71
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the ColumnExprTrait class template.
Header file for the DeclUpp functor.
Header file for the IsSame and IsStrictlySame type traits.
Flag for Hermitian matrices.
Definition: DMatDMatMultExpr.h:198
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:194
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:162
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:315
Header file for the IsColumnMajorMatrix type trait.
RightOperand scalar_
Right-hand side scalar of the multiplication expression.
Definition: DMatScalarMultExpr.h:633
Header file for the dense matrix multiplication kernels.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:533
IfTrue_< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:318
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: DMatScalarMultExpr.h:550
Header file for the IsRowVector type trait.
typename DisableIf< Condition, T >::Type DisableIf_
Auxiliary type for the DisableIf class template.The DisableIf_ alias declaration provides a convenien...
Definition: DisableIf.h:223
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:477
Header file for the IsIntegral type trait.
If_< IsExpression< MT1 >, const MT1, const MT1 &> LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:309
Base class for all matrix/scalar multiplication expression templates.The MatScalarMultExpr class serv...
Definition: MatScalarMultExpr.h:66
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1755
Header file for the DenseVector base class.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:88
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:163
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:721
DMatDMatMultExpr< MT1, MT2, SF, HF, LF, UF > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:298
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:245
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:306
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member constant is set to true, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to false, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:138
DisableIf_< IsSymmetric< MT >, const DMatDeclSymExpr< MT, SO > > declsym(const DenseMatrix< MT, SO > &dm)
Declares the given non-symmetric dense matrix expression dm as symmetric.
Definition: DMatDeclSymExpr.h:841
Evaluation of the expression type of a dense matrix/dense vector multiplication.Via this type trait i...
Definition: DMatDVecMultExprTrait.h:78
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:88
Constraints on the storage order of matrix types.
Header file for the RequiresEvaluation type trait.
System settings for performance optimizations.
Compile time check for data types.This type trait tests whether or not the given types can be combine...
Definition: IsSIMDCombinable.h:120
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:170
Header file for the IsUniLower type trait.
Header file for the IsBLASCompatible type trait.
RightOperand rightOperand() const noexcept
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:453
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:323
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1802
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Flag for lower matrices.
Definition: DMatDMatMultExpr.h:199
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:71
CompositeType_< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:181
Base class for sparse matrices.The SparseMatrix class is a base class for all sparse matrix classes...
Definition: Forward.h:119
Constraint on the data type.
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
Header file for the IsComplexDouble type trait.
Compile time check for low-level access to mutable data.This type trait tests whether the given data ...
Definition: HasMutableDataAccess.h:75
Compile time check for the alignment of data types.This type trait tests whether the given data type ...
Definition: IsAligned.h:87
Evaluation of the expression type of a dense matrix declsym operation.Via this type trait it is possi...
Definition: DMatDeclSymExprTrait.h:75
Constraint on the data type.
typename MultExprTrait< T1, T2 >::Type MultExprTrait_
Auxiliary alias declaration for the MultExprTrait class template.The MultExprTrait_ alias declaration...
Definition: MultExprTrait.h:344
Header file for the MultExprTrait class template.
DisableIf_< IsHermitian< MT >, const DMatDeclHermExpr< MT, SO > > declherm(const DenseMatrix< MT, SO > &dm)
Declares the given non-Hermitian dense matrix expression dm as Hermitian.
Definition: DMatDeclHermExpr.h:841
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:72
SubvectorExprTrait_< VT, unaligned > subvector(Vector< VT, TF > &vector, size_t index, size_t size)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:152
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
Compile time check for upper unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniUpper.h:86
Header file for the DisableIf class template.
Compile time check for dense vector types.This type trait tests whether or not the given template par...
Definition: IsDenseVector.h:78
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
ElementType_< RT1 > ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:178
Header file for the IsSymmetric type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the DeclLow functor.
Header file for the IsDouble type trait.
Header file for the If class template.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:83
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
Generic wrapper for the decllow() function.
Definition: DeclLow.h:58
Compile time check for data types with padding.This type trait tests whether the given data type empl...
Definition: IsPadded.h:76
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the Or class template.
Expression object for dense matrix-scalar multiplications.The DMatScalarMultExpr class represents the...
Definition: DMatScalarMultExpr.h:123
Flag for symmetric matrices.
Definition: DMatDMatMultExpr.h:197
ResultType_< MT1 > RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:176
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
Header file for the DMatDeclLowExprTrait class template.
Header file for the Columns type trait.
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: DMatDMatMultExpr.h:497
Header file for the Not class template.
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
Evaluation of the expression type of a dense matrix/sparse vector multiplication.Via this type trait ...
Definition: DMatSVecMultExprTrait.h:80
Compile time check for sparse vector types.This type trait tests whether or not the given template pa...
Definition: IsSparseVector.h:78
Evaluation of the expression type type of a submatrix operation.Via this type trait it is possible to...
Definition: SubmatrixExprTrait.h:80
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:71
Header file for the IsLower type trait.
OppositeType_< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:301
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:359
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:90
Compile time check for strictly triangular matrix types.This type trait tests whether or not the give...
Definition: IsStrictlyTriangular.h:87
Compile time check for data types.This type trait tests whether or not the given template parameter i...
Definition: IsBLASCompatible.h:80
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT >, IsDeclExpr< MT > >, RowExprTrait_< MT > > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:128
Header file for the IsStrictlyTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:60
Generic wrapper for the null function.
Definition: Noop.h:58
Header file for the IsTriangular type trait.
Header file for the DMatDeclUppExprTrait class template.
Header file for the DMatDeclSymExprTrait class template.
Compile time check for column vector types.This type trait tests whether or not the given template ar...
Definition: IsColumnVector.h:80
Constraints on the storage order of matrix types.
typename TDVecDMatMultExprTrait< VT, MT >::Type TDVecDMatMultExprTrait_
Auxiliary alias declaration for the TDVecDMatMultExprTrait class template.The TDVecDMatMultExprTrait_...
Definition: TDVecDMatMultExprTrait.h:119
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Header file for the exception macros of the math module.
DisableIf_< IsLower< MT >, const DMatDeclLowExpr< MT, SO > > decllow(const DenseMatrix< MT, SO > &dm)
Declares the given non-lower dense matrix expression dm as lower.
Definition: DMatDeclLowExpr.h:842
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Evaluation of the expression type type of a row operation.Via this type trait it is possible to evalu...
Definition: RowExprTrait.h:79
LeftOperand matrix_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatScalarMultExpr.h:632
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:260
Header file for the DeclDiag functor.
Evaluation of the expression type of a dense matrix declherm operation.Via this type trait it is poss...
Definition: DMatDeclHermExprTrait.h:75
Compile time check for dense matrix types.This type trait tests whether or not the given template par...
Definition: IsDenseMatrix.h:78
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT >, IsDeclExpr< MT > >, ColumnExprTrait_< MT > > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:128
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
#define BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
Compilation switch for the BLAS matrix/matrix multiplication kernels (gemv).This compilation switch e...
Definition: BLAS.h:93
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:109
Compile time check for lower unitriangular matrices.This type trait tests whether or not the given te...
Definition: IsUniLower.h:86
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:302
CompositeType_< MT1 > CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:180
Header file for the conjugate shim.
Header file for the IsNumeric type trait.
typename T::LeftOperand LeftOperand_
Alias declaration for nested LeftOperand type definitions.The LeftOperand_ alias declaration provides...
Definition: Aliases.h:203
Header file for the HasConstDataAccess type trait.
Compile time check for resizable data types.This type trait tests whether the given data type is a re...
Definition: IsResizable.h:75
System settings for the BLAS mode.
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSIMDCombinable type trait.
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:79
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
Header file for the MatScalarMultExpr base class.
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: DMatDMatMultExpr.h:407
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:83
ElementType_< ResultType > ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:303
Utility type for generic codes.
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:160
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:61
Header file for the reset shim.
SIMD characteristics of data types.The SIMDTrait class template provides the SIMD characteristics of ...
Definition: SIMDTrait.h:296
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:93
Compile time type negation.The Not class template negates the given compile time condition. In case the given condition would evaluate to true, the nested member enumeration is set to false and vice versa:
Definition: Not.h:70
Header file for the DMatDeclHermExprTrait class template.
Compile time check for Hermitian matrices.This type trait tests whether or not the given template par...
Definition: IsHermitian.h:85
Compile time check for built-in data types.This type trait tests whether or not the given template pa...
Definition: IsBuiltin.h:75
Compile time check for integral data types.This type trait tests whether or not the given template pa...
Definition: IsIntegral.h:75
Base class for matrices.The Matrix class is a base class for all dense and sparse matrix classes with...
Definition: Forward.h:94
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: DMatDMatMultExpr.h:304
Constraint on the data type.
Constraints on the storage order of matrix types.
Generic wrapper for the declherm() function.
Definition: DeclHerm.h:58
Header file for the HasMutableDataAccess type trait.
Header file for the Noop functor.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:113
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:465
Header file for BLAS triangular matrix/matrix multiplication functions (trmm)
IfTrue_< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:315
ResultType_< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:177
typename DMatDVecMultExprTrait< MT, VT >::Type DMatDVecMultExprTrait_
Auxiliary alias declaration for the DMatDVecMultExprTrait class template.The DMatDVecMultExprTrait_ a...
Definition: DMatDVecMultExprTrait.h:119
typename EnableIf< Condition, T >::Type EnableIf_
Auxiliary alias declaration for the EnableIf class template.The EnableIf_ alias declaration provides ...
Definition: EnableIf.h:223
typename T::OppositeType OppositeType_
Alias declaration for nested OppositeType type definitions.The OppositeType_ alias declaration provid...
Definition: Aliases.h:243
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Header file for the IsDenseVector type trait.
LeftOperand leftOperand() const noexcept
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:443
Generic wrapper for the declupp() function.
Definition: DeclUpp.h:58
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
Evaluation of the expression type of a dense vector/dense matrix multiplication.Via this type trait i...
Definition: TDVecDMatMultExprTrait.h:78
Evaluation of the expression type of a sparse vector/dense matrix multiplication.Via this type trait ...
Definition: TSVecDMatMultExprTrait.h:78
Compile time check for complex types.This type trait tests whether or not the given template paramete...
Definition: IsComplex.h:76
Header file for the IsRowMajorMatrix type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:55
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:733
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDMatMultExpr.h:487
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
Header file for BLAS general matrix/matrix multiplication functions (gemm)
Header file for the IsComplexFloat type trait.
Header file for the IntegralConstant class template.
Compile time evaluation of the number of columns of a matrix.The Columns type trait evaluates the num...
Definition: Columns.h:76
Evaluation of the expression type of a dense matrix decllow operation.Via this type trait it is possi...
Definition: DMatDeclLowExprTrait.h:75
Generic wrapper for the decldiag() function.
Definition: DeclDiag.h:58
Evaluation of the expression type of a dense matrix decldiag operation.Via this type trait it is poss...
Definition: DMatDeclDiagExprTrait.h:75
Compile time evaluation of the number of rows of a matrix.The Rows type trait evaluates the number of...
Definition: Rows.h:76
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
Header file for the DeclHerm functor.
Header file for the complex data type.
typename T::RightOperand RightOperand_
Alias declaration for nested RightOperand type definitions.The RightOperand_ alias declaration provid...
Definition: Aliases.h:363
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:423
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:403
Header file for the IsUpper type trait.
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
System settings for the debugging policy of the Blaze library.
Constraint on the data type.
Generic wrapper for the declsym() function.
Definition: DeclSym.h:58
BLAZE_ALWAYS_INLINE bool isSquare(const Matrix< MT, SO > &matrix) noexcept
Checks if the given matrix is a square matrix.
Definition: Matrix.h:677
Header file for the IsResizable type trait.
const DMatDMatMultExpr< T1, T2, false, false, false, false > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7505
Flag for upper matrices.
Definition: DMatDMatMultExpr.h:200
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:508
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
DisableIf_< IsDiagonal< MT >, const DMatDeclDiagExpr< MT, SO > > decldiag(const DenseMatrix< MT, SO > &dm)
Declares the given non-diagonal dense matrix expression dm as diagonal.
Definition: DMatDeclDiagExpr.h:841
DisableIf_< IsUpper< MT >, const DMatDeclUppExpr< MT, SO > > declupp(const DenseMatrix< MT, SO > &dm)
Declares the given non-upper dense matrix expression dm as upper.
Definition: DMatDeclUppExpr.h:842
Evaluation of the expression type type of a column operation.Via this type trait it is possible to ev...
Definition: ColumnExprTrait.h:78
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the Bool class template.
Header file for the DeclSym functor.
Header file for the TrueType type/value trait base class.
Header file for the IsExpression type trait class.
Header file for the function trace functionality.