DMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <blaze/math/blas/gemm.h>
44 #include <blaze/math/blas/trmm.h>
45 #include <blaze/math/Aliases.h>
51 #include <blaze/math/Exception.h>
57 #include <blaze/math/Functions.h>
58 #include <blaze/math/shims/Reset.h>
60 #include <blaze/math/SIMD.h>
101 #include <blaze/system/BLAS.h>
102 #include <blaze/system/Blocking.h>
104 #include <blaze/system/Thresholds.h>
105 #include <blaze/util/Assert.h>
106 #include <blaze/util/Complex.h>
110 #include <blaze/util/DisableIf.h>
111 #include <blaze/util/EnableIf.h>
113 #include <blaze/util/InvalidType.h>
115 #include <blaze/util/mpl/And.h>
116 #include <blaze/util/mpl/If.h>
117 #include <blaze/util/mpl/Not.h>
118 #include <blaze/util/mpl/Or.h>
119 #include <blaze/util/Types.h>
128 
129 
130 namespace blaze {
131 
132 //=================================================================================================
133 //
134 // CLASS DMATTDMATMULTEXPR
135 //
136 //=================================================================================================
137 
138 //*************************************************************************************************
145 template< typename MT1 // Type of the left-hand side dense matrix
146  , typename MT2 > // Type of the right-hand side dense matrix
147 class DMatTDMatMultExpr : public DenseMatrix< DMatTDMatMultExpr<MT1,MT2>, false >
148  , private MatMatMultExpr
149  , private Computation
150 {
151  private:
152  //**Type definitions****************************************************************************
159  //**********************************************************************************************
160 
161  //**********************************************************************************************
163  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
164  //**********************************************************************************************
165 
166  //**********************************************************************************************
168  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
169  //**********************************************************************************************
170 
171  //**********************************************************************************************
173 
177  template< typename T1, typename T2, typename T3 >
178  struct IsEvaluationRequired {
179  enum : bool { value = ( evaluateLeft || evaluateRight ) };
180  };
182  //**********************************************************************************************
183 
184  //**********************************************************************************************
186 
189  template< typename T1, typename T2, typename T3 >
190  struct UseBlasKernel {
192  HasMutableDataAccess<T1>::value &&
193  HasConstDataAccess<T2>::value &&
194  HasConstDataAccess<T3>::value &&
195  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
196  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
197  IsBLASCompatible< ElementType_<T1> >::value &&
198  IsBLASCompatible< ElementType_<T2> >::value &&
199  IsBLASCompatible< ElementType_<T3> >::value &&
200  IsSame< ElementType_<T1>, ElementType_<T2> >::value &&
201  IsSame< ElementType_<T1>, ElementType_<T3> >::value };
202  };
204  //**********************************************************************************************
205 
206  //**********************************************************************************************
208 
211  template< typename T1, typename T2, typename T3 >
212  struct UseVectorizedDefaultKernel {
213  enum : bool { value = useOptimizedKernels &&
214  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
215  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
216  AreSIMDCombinable< ElementType_<T1>
217  , ElementType_<T2>
218  , ElementType_<T3> >::value &&
219  HasSIMDAdd< ElementType_<T2>, ElementType_<T3> >::value &&
220  HasSIMDMult< ElementType_<T2>, ElementType_<T3> >::value };
221  };
223  //**********************************************************************************************
224 
225  public:
226  //**Type definitions****************************************************************************
233  typedef const ElementType ReturnType;
234  typedef const ResultType CompositeType;
235 
237  typedef If_< IsExpression<MT1>, const MT1, const MT1& > LeftOperand;
238 
240  typedef If_< IsExpression<MT2>, const MT2, const MT2& > RightOperand;
241 
244 
247  //**********************************************************************************************
248 
249  //**Compilation flags***************************************************************************
251  enum : bool { simdEnabled = !IsDiagonal<MT1>::value && !IsDiagonal<MT2>::value &&
252  MT1::simdEnabled && MT2::simdEnabled &&
255 
257  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
258  !evaluateRight && MT2::smpAssignable };
259  //**********************************************************************************************
260 
261  //**SIMD properties*****************************************************************************
263  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
264  //**********************************************************************************************
265 
266  //**Constructor*********************************************************************************
272  explicit inline DMatTDMatMultExpr( const MT1& lhs, const MT2& rhs ) noexcept
273  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
274  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
275  {
276  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
277  }
278  //**********************************************************************************************
279 
280  //**Access operator*****************************************************************************
287  inline ReturnType operator()( size_t i, size_t j ) const {
288  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
289  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
290 
291  if( IsDiagonal<MT1>::value ) {
292  return lhs_(i,i) * rhs_(i,j);
293  }
294  else if( IsDiagonal<MT2>::value ) {
295  return lhs_(i,j) * rhs_(j,j);
296  }
298  const size_t begin( ( IsUpper<MT1>::value )
299  ?( ( IsLower<MT2>::value )
300  ?( max( ( IsStrictlyUpper<MT1>::value ? i+1UL : i )
301  , ( IsStrictlyLower<MT2>::value ? j+1UL : j ) ) )
302  :( IsStrictlyUpper<MT1>::value ? i+1UL : i ) )
303  :( ( IsLower<MT2>::value )
304  ?( IsStrictlyLower<MT2>::value ? j+1UL : j )
305  :( 0UL ) ) );
306  const size_t end( ( IsLower<MT1>::value )
307  ?( ( IsUpper<MT2>::value )
308  ?( min( ( IsStrictlyLower<MT1>::value ? i : i+1UL )
309  , ( IsStrictlyUpper<MT2>::value ? j : j+1UL ) ) )
310  :( IsStrictlyLower<MT1>::value ? i : i+1UL ) )
311  :( ( IsUpper<MT2>::value )
312  ?( IsStrictlyUpper<MT2>::value ? j : j+1UL )
313  :( lhs_.columns() ) ) );
314 
315  if( begin >= end ) return ElementType();
316 
317  const size_t n( end - begin );
318 
319  return subvector( row( lhs_, i ), begin, n ) * subvector( column( rhs_, j ), begin, n );
320  }
321  else {
322  return row( lhs_, i ) * column( rhs_, j );
323  }
324  }
325  //**********************************************************************************************
326 
327  //**At function*********************************************************************************
335  inline ReturnType at( size_t i, size_t j ) const {
336  if( i >= lhs_.rows() ) {
337  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
338  }
339  if( j >= rhs_.columns() ) {
340  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
341  }
342  return (*this)(i,j);
343  }
344  //**********************************************************************************************
345 
346  //**Rows function*******************************************************************************
351  inline size_t rows() const noexcept {
352  return lhs_.rows();
353  }
354  //**********************************************************************************************
355 
356  //**Columns function****************************************************************************
361  inline size_t columns() const noexcept {
362  return rhs_.columns();
363  }
364  //**********************************************************************************************
365 
366  //**Left operand access*************************************************************************
371  inline LeftOperand leftOperand() const noexcept {
372  return lhs_;
373  }
374  //**********************************************************************************************
375 
376  //**Right operand access************************************************************************
381  inline RightOperand rightOperand() const noexcept {
382  return rhs_;
383  }
384  //**********************************************************************************************
385 
386  //**********************************************************************************************
392  template< typename T >
393  inline bool canAlias( const T* alias ) const noexcept {
394  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
395  }
396  //**********************************************************************************************
397 
398  //**********************************************************************************************
404  template< typename T >
405  inline bool isAliased( const T* alias ) const noexcept {
406  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
407  }
408  //**********************************************************************************************
409 
410  //**********************************************************************************************
415  inline bool isAligned() const noexcept {
416  return lhs_.isAligned() && rhs_.isAligned();
417  }
418  //**********************************************************************************************
419 
420  //**********************************************************************************************
425  inline bool canSMPAssign() const noexcept {
426  return ( !BLAZE_BLAS_IS_PARALLEL ||
427  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
428  ( rows() * columns() >= SMP_DMATTDMATMULT_THRESHOLD );
429  }
430  //**********************************************************************************************
431 
432  private:
433  //**Member variables****************************************************************************
434  LeftOperand lhs_;
435  RightOperand rhs_;
436  //**********************************************************************************************
437 
438  //**Assignment to dense matrices****************************************************************
451  template< typename MT // Type of the target dense matrix
452  , bool SO > // Storage order of the target dense matrix
453  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
454  {
456 
457  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
458  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
459 
460  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
461  return;
462  }
463  else if( rhs.lhs_.columns() == 0UL ) {
464  reset( ~lhs );
465  return;
466  }
467 
468  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
469  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
470 
471  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
472  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
473  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
474  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
475  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
476  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
477 
478  DMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
479  }
481  //**********************************************************************************************
482 
483  //**Assignment to dense matrices (kernel selection)*********************************************
494  template< typename MT3 // Type of the left-hand side target matrix
495  , typename MT4 // Type of the left-hand side matrix operand
496  , typename MT5 > // Type of the right-hand side matrix operand
497  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
498  {
500  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
501  selectSmallAssignKernel( C, A, B );
502  else
503  selectBlasAssignKernel( C, A, B );
504  }
506  //**********************************************************************************************
507 
508  //**Default assignment to row-major dense matrices (general/general)****************************
522  template< typename MT3 // Type of the left-hand side target matrix
523  , typename MT4 // Type of the left-hand side matrix operand
524  , typename MT5 > // Type of the right-hand side matrix operand
525  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
526  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
527  {
528  const size_t M( A.rows() );
529  const size_t N( B.columns() );
530  const size_t K( A.columns() );
531 
532  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
533  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
534  :( 0UL ) );
535  const size_t iend( ( IsStrictlyUpper<MT4>::value )
536  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
537  :( M ) );
538  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
539 
540  for( size_t i=0UL; i<ibegin; ++i ) {
541  for( size_t j=0UL; j<N; ++j ) {
542  reset( (~C)(i,j) );
543  }
544  }
545  for( size_t i=ibegin; i<iend; ++i )
546  {
547  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
548  ?( ( IsStrictlyUpper<MT4>::value )
549  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
550  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
551  :( IsStrictlyUpper<MT5>::value ? 1UL : 0UL ) );
552  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
553  ?( ( IsStrictlyLower<MT4>::value )
554  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
555  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
556  :( IsStrictlyLower<MT5>::value ? N-1UL : N ) );
557  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
558 
559  for( size_t j=0UL; j<jbegin; ++j ) {
560  reset( (~C)(i,j) );
561  }
562  for( size_t j=jbegin; j<jend; ++j )
563  {
564  const size_t kbegin( ( IsUpper<MT4>::value )
565  ?( ( IsLower<MT5>::value )
566  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
567  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
568  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
569  :( ( IsLower<MT5>::value )
570  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
571  :( 0UL ) ) );
572  const size_t kend( ( IsLower<MT4>::value )
573  ?( ( IsUpper<MT5>::value )
574  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
575  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
576  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
577  :( ( IsUpper<MT5>::value )
578  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
579  :( K ) ) );
580  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
581 
582  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
583  for( size_t k=kbegin+1UL; k<kend; ++k ) {
584  (~C)(i,j) += A(i,k) * B(k,j);
585  }
586  }
587  for( size_t j=jend; j<N; ++j ) {
588  reset( (~C)(i,j) );
589  }
590  }
591  for( size_t i=iend; i<M; ++i ) {
592  for( size_t j=0UL; j<N; ++j ) {
593  reset( (~C)(i,j) );
594  }
595  }
596  }
598  //**********************************************************************************************
599 
600  //**Default assignment to column-major dense matrices (general/general)*************************
614  template< typename MT3 // Type of the left-hand side target matrix
615  , typename MT4 // Type of the left-hand side matrix operand
616  , typename MT5 > // Type of the right-hand side matrix operand
617  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
618  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
619  {
620  const size_t M( A.rows() );
621  const size_t N( B.columns() );
622  const size_t K( A.columns() );
623 
624  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
625  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
626  :( 0UL ) );
627  const size_t jend( ( IsStrictlyLower<MT5>::value )
628  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
629  :( N ) );
630  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
631 
632  for( size_t j=0UL; j<jbegin; ++j ) {
633  for( size_t i=0UL; i<M; ++i ) {
634  reset( (~C)(i,j) );
635  }
636  }
637  for( size_t j=jbegin; j<jend; ++j )
638  {
639  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
640  ?( ( IsStrictlyLower<MT4>::value )
641  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
642  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
643  :( IsStrictlyLower<MT4>::value ? 1UL : 0UL ) );
644  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
645  ?( ( IsStrictlyUpper<MT4>::value )
646  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
647  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
648  :( IsStrictlyUpper<MT4>::value ? M-1UL : M ) );
649  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
650 
651  for( size_t i=0UL; i<ibegin; ++i ) {
652  reset( (~C)(i,j) );
653  }
654  for( size_t i=ibegin; i<iend; ++i )
655  {
656  const size_t kbegin( ( IsUpper<MT4>::value )
657  ?( ( IsLower<MT5>::value )
658  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
659  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
660  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
661  :( ( IsLower<MT5>::value )
662  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
663  :( 0UL ) ) );
664  const size_t kend( ( IsLower<MT4>::value )
665  ?( ( IsUpper<MT5>::value )
666  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
667  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
668  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
669  :( ( IsUpper<MT5>::value )
670  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
671  :( K ) ) );
672  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
673 
674  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
675  for( size_t k=kbegin+1UL; k<kend; ++k ) {
676  (~C)(i,j) += A(i,k) * B(k,j);
677  }
678  }
679  for( size_t i=iend; i<M; ++i ) {
680  reset( (~C)(i,j) );
681  }
682  }
683  for( size_t j=jend; j<N; ++j ) {
684  for( size_t i=0UL; i<M; ++i ) {
685  reset( (~C)(i,j) );
686  }
687  }
688  }
690  //**********************************************************************************************
691 
692  //**Default assignment to row-major dense matrices (general/diagonal)***************************
706  template< typename MT3 // Type of the left-hand side target matrix
707  , typename MT4 // Type of the left-hand side matrix operand
708  , typename MT5 > // Type of the right-hand side matrix operand
709  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
710  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
711  {
712  const size_t M( A.rows() );
713  const size_t N( B.columns() );
714 
715  for( size_t i=0UL; i<M; ++i )
716  {
717  const size_t jbegin( ( IsUpper<MT4>::value )
718  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
719  :( 0UL ) );
720  const size_t jend( ( IsLower<MT4>::value )
721  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
722  :( N ) );
723  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
724 
725  if( IsUpper<MT4>::value ) {
726  for( size_t j=0UL; j<jbegin; ++j ) {
727  reset( (~C)(i,j) );
728  }
729  }
730  for( size_t j=jbegin; j<jend; ++j ) {
731  (~C)(i,j) = A(i,j) * B(j,j);
732  }
733  if( IsLower<MT4>::value ) {
734  for( size_t j=jend; j<N; ++j ) {
735  reset( (~C)(i,j) );
736  }
737  }
738  }
739  }
741  //**********************************************************************************************
742 
743  //**Default assignment to column-major dense matrices (general/diagonal)************************
757  template< typename MT3 // Type of the left-hand side target matrix
758  , typename MT4 // Type of the left-hand side matrix operand
759  , typename MT5 > // Type of the right-hand side matrix operand
760  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
761  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
762  {
763  const size_t M( A.rows() );
764  const size_t N( B.columns() );
765 
766  const size_t block( BLOCK_SIZE );
767 
768  for( size_t jj=0UL; jj<N; jj+=block ) {
769  const size_t jend( min( N, jj+block ) );
770  for( size_t ii=0UL; ii<M; ii+=block ) {
771  const size_t iend( min( M, ii+block ) );
772  for( size_t j=jj; j<jend; ++j )
773  {
774  const size_t ibegin( ( IsLower<MT4>::value )
775  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
776  :( ii ) );
777  const size_t ipos( ( IsUpper<MT4>::value )
778  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
779  :( iend ) );
780 
781  if( IsLower<MT4>::value ) {
782  for( size_t i=ii; i<ibegin; ++i ) {
783  reset( (~C)(i,j) );
784  }
785  }
786  for( size_t i=ibegin; i<ipos; ++i ) {
787  (~C)(i,j) = A(i,j) * B(j,j);
788  }
789  if( IsUpper<MT4>::value ) {
790  for( size_t i=ipos; i<iend; ++i ) {
791  reset( (~C)(i,j) );
792  }
793  }
794  }
795  }
796  }
797  }
799  //**********************************************************************************************
800 
801  //**Default assignment to row-major dense matrices (diagonal/general)***************************
815  template< typename MT3 // Type of the left-hand side target matrix
816  , typename MT4 // Type of the left-hand side matrix operand
817  , typename MT5 > // Type of the right-hand side matrix operand
818  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
819  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
820  {
821  const size_t M( A.rows() );
822  const size_t N( B.columns() );
823 
824  const size_t block( BLOCK_SIZE );
825 
826  for( size_t ii=0UL; ii<M; ii+=block ) {
827  const size_t iend( min( M, ii+block ) );
828  for( size_t jj=0UL; jj<N; jj+=block ) {
829  const size_t jend( min( N, jj+block ) );
830  for( size_t i=ii; i<iend; ++i )
831  {
832  const size_t jbegin( ( IsUpper<MT5>::value )
833  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
834  :( jj ) );
835  const size_t jpos( ( IsLower<MT5>::value )
836  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
837  :( jend ) );
838 
839  if( IsUpper<MT5>::value ) {
840  for( size_t j=jj; j<jbegin; ++j ) {
841  reset( (~C)(i,j) );
842  }
843  }
844  for( size_t j=jbegin; j<jpos; ++j ) {
845  (~C)(i,j) = A(i,i) * B(i,j);
846  }
847  if( IsLower<MT5>::value ) {
848  for( size_t j=jpos; j<jend; ++j ) {
849  reset( (~C)(i,j) );
850  }
851  }
852  }
853  }
854  }
855  }
857  //**********************************************************************************************
858 
859  //**Default assignment to column-major dense matrices (diagonal/general)************************
873  template< typename MT3 // Type of the left-hand side target matrix
874  , typename MT4 // Type of the left-hand side matrix operand
875  , typename MT5 > // Type of the right-hand side matrix operand
876  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
877  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
878  {
879  const size_t M( A.rows() );
880  const size_t N( B.columns() );
881 
882  for( size_t j=0UL; j<N; ++j )
883  {
884  const size_t ibegin( ( IsLower<MT5>::value )
885  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
886  :( 0UL ) );
887  const size_t iend( ( IsUpper<MT5>::value )
888  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
889  :( M ) );
890  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
891 
892  if( IsLower<MT5>::value ) {
893  for( size_t i=0UL; i<ibegin; ++i ) {
894  reset( (~C)(i,j) );
895  }
896  }
897  for( size_t i=ibegin; i<iend; ++i ) {
898  (~C)(i,j) = A(i,i) * B(i,j);
899  }
900  if( IsUpper<MT5>::value ) {
901  for( size_t i=iend; i<M; ++i ) {
902  reset( (~C)(i,j) );
903  }
904  }
905  }
906  }
908  //**********************************************************************************************
909 
910  //**Default assignment to dense matrices (diagonal/diagonal)************************************
924  template< typename MT3 // Type of the left-hand side target matrix
925  , typename MT4 // Type of the left-hand side matrix operand
926  , typename MT5 > // Type of the right-hand side matrix operand
927  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
928  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
929  {
930  reset( C );
931 
932  for( size_t i=0UL; i<A.rows(); ++i ) {
933  C(i,i) = A(i,i) * B(i,i);
934  }
935  }
937  //**********************************************************************************************
938 
939  //**Default assignment to dense matrices (small matrices)***************************************
953  template< typename MT3 // Type of the left-hand side target matrix
954  , typename MT4 // Type of the left-hand side matrix operand
955  , typename MT5 > // Type of the right-hand side matrix operand
956  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
957  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B )
958  {
959  selectDefaultAssignKernel( C, A, B );
960  }
962  //**********************************************************************************************
963 
964  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
979  template< typename MT3 // Type of the left-hand side target matrix
980  , typename MT4 // Type of the left-hand side matrix operand
981  , typename MT5 > // Type of the right-hand side matrix operand
982  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
983  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
984  {
985  const size_t M( A.rows() );
986  const size_t N( B.columns() );
987  const size_t K( A.columns() );
988 
989  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
990 
991  size_t i( 0UL );
992 
993  for( ; (i+2UL) <= M; i+=2UL )
994  {
995  size_t j( 0UL );
996 
997  for( ; (j+4UL) <= N; j+=4UL )
998  {
999  const size_t kbegin( ( IsUpper<MT4>::value )
1000  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1001  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1002  const size_t kend( ( IsLower<MT4>::value )
1003  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
1004  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
1005 
1006  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1007  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1008 
1009  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1010  size_t k( kbegin );
1011 
1012  for( ; k<kpos; k+=SIMDSIZE ) {
1013  const SIMDType a1( A.load(i ,k) );
1014  const SIMDType a2( A.load(i+1UL,k) );
1015  const SIMDType b1( B.load(k,j ) );
1016  const SIMDType b2( B.load(k,j+1UL) );
1017  const SIMDType b3( B.load(k,j+2UL) );
1018  const SIMDType b4( B.load(k,j+3UL) );
1019  xmm1 = xmm1 + a1 * b1;
1020  xmm2 = xmm2 + a1 * b2;
1021  xmm3 = xmm3 + a1 * b3;
1022  xmm4 = xmm4 + a1 * b4;
1023  xmm5 = xmm5 + a2 * b1;
1024  xmm6 = xmm6 + a2 * b2;
1025  xmm7 = xmm7 + a2 * b3;
1026  xmm8 = xmm8 + a2 * b4;
1027  }
1028 
1029  (~C)(i ,j ) = sum( xmm1 );
1030  (~C)(i ,j+1UL) = sum( xmm2 );
1031  (~C)(i ,j+2UL) = sum( xmm3 );
1032  (~C)(i ,j+3UL) = sum( xmm4 );
1033  (~C)(i+1UL,j ) = sum( xmm5 );
1034  (~C)(i+1UL,j+1UL) = sum( xmm6 );
1035  (~C)(i+1UL,j+2UL) = sum( xmm7 );
1036  (~C)(i+1UL,j+3UL) = sum( xmm8 );
1037 
1038  for( ; remainder && k<kend; ++k ) {
1039  (~C)(i ,j ) += A(i ,k) * B(k,j );
1040  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1041  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL);
1042  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL);
1043  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1044  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1045  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL);
1046  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL);
1047  }
1048  }
1049 
1050  for( ; (j+2UL) <= N; j+=2UL )
1051  {
1052  const size_t kbegin( ( IsUpper<MT4>::value )
1053  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1054  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1055  const size_t kend( ( IsLower<MT4>::value )
1056  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
1057  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1058 
1059  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1060  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1061 
1062  SIMDType xmm1, xmm2, xmm3, xmm4;
1063  size_t k( kbegin );
1064 
1065  for( ; k<kpos; k+=SIMDSIZE ) {
1066  const SIMDType a1( A.load(i ,k) );
1067  const SIMDType a2( A.load(i+1UL,k) );
1068  const SIMDType b1( B.load(k,j ) );
1069  const SIMDType b2( B.load(k,j+1UL) );
1070  xmm1 = xmm1 + a1 * b1;
1071  xmm2 = xmm2 + a1 * b2;
1072  xmm3 = xmm3 + a2 * b1;
1073  xmm4 = xmm4 + a2 * b2;
1074  }
1075 
1076  (~C)(i ,j ) = sum( xmm1 );
1077  (~C)(i ,j+1UL) = sum( xmm2 );
1078  (~C)(i+1UL,j ) = sum( xmm3 );
1079  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1080 
1081  for( ; remainder && k<kend; ++k ) {
1082  (~C)(i ,j ) += A(i ,k) * B(k,j );
1083  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1084  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1085  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1086  }
1087  }
1088 
1089  if( j < N )
1090  {
1091  const size_t kbegin( ( IsUpper<MT4>::value )
1092  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1093  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1094  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
1095 
1096  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1097  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1098 
1099  SIMDType xmm1, xmm2;
1100  size_t k( kbegin );
1101 
1102  for( ; k<kpos; k+=SIMDSIZE ) {
1103  const SIMDType b1( B.load(k,j) );
1104  xmm1 = xmm1 + A.load(i ,k) * b1;
1105  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1106  }
1107 
1108  (~C)(i ,j) = sum( xmm1 );
1109  (~C)(i+1UL,j) = sum( xmm2 );
1110 
1111  for( ; remainder && k<kend; ++k ) {
1112  (~C)(i ,j) += A(i ,k) * B(k,j);
1113  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
1114  }
1115  }
1116  }
1117 
1118  if( i < M )
1119  {
1120  size_t j( 0UL );
1121 
1122  for( ; (j+4UL) <= N; j+=4UL )
1123  {
1124  const size_t kbegin( ( IsUpper<MT4>::value )
1125  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1126  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1127  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
1128 
1129  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1130  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1131 
1132  SIMDType xmm1, xmm2, xmm3, xmm4;
1133  size_t k( kbegin );
1134 
1135  for( ; k<kpos; k+=SIMDSIZE ) {
1136  const SIMDType a1( A.load(i,k) );
1137  xmm1 = xmm1 + a1 * B.load(k,j );
1138  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1139  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1140  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1141  }
1142 
1143  (~C)(i,j ) = sum( xmm1 );
1144  (~C)(i,j+1UL) = sum( xmm2 );
1145  (~C)(i,j+2UL) = sum( xmm3 );
1146  (~C)(i,j+3UL) = sum( xmm4 );
1147 
1148  for( ; remainder && k<kend; ++k ) {
1149  (~C)(i,j ) += A(i,k) * B(k,j );
1150  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
1151  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL);
1152  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL);
1153  }
1154  }
1155 
1156  for( ; (j+2UL) <= N; j+=2UL )
1157  {
1158  const size_t kbegin( ( IsUpper<MT4>::value )
1159  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1160  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1161  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
1162 
1163  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1164  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1165 
1166  SIMDType xmm1, xmm2;
1167  size_t k( kbegin );
1168 
1169  for( ; k<kpos; k+=SIMDSIZE ) {
1170  const SIMDType a1( A.load(i,k) );
1171  xmm1 = xmm1 + a1 * B.load(k,j );
1172  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1173  }
1174 
1175  (~C)(i,j ) = sum( xmm1 );
1176  (~C)(i,j+1UL) = sum( xmm2 );
1177 
1178  for( ; remainder && k<kend; ++k ) {
1179  (~C)(i,j ) += A(i,k) * B(k,j );
1180  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
1181  }
1182  }
1183 
1184  if( j < N )
1185  {
1186  const size_t kbegin( ( IsUpper<MT4>::value )
1187  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1188  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1189 
1190  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
1191  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1192 
1193  SIMDType xmm1;
1194  size_t k( kbegin );
1195 
1196  for( ; k<kpos; k+=SIMDSIZE ) {
1197  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1198  }
1199 
1200  (~C)(i,j) = sum( xmm1 );
1201 
1202  for( ; remainder && k<K; ++k ) {
1203  (~C)(i,j) += A(i,k) * B(k,j);
1204  }
1205  }
1206  }
1207  }
1209  //**********************************************************************************************
1210 
1211  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
1226  template< typename MT3 // Type of the left-hand side target matrix
1227  , typename MT4 // Type of the left-hand side matrix operand
1228  , typename MT5 > // Type of the right-hand side matrix operand
1229  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
1230  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1231  {
1232  const size_t M( A.rows() );
1233  const size_t N( B.columns() );
1234  const size_t K( A.columns() );
1235 
1236  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
1237 
1238  size_t i( 0UL );
1239 
1240  for( ; (i+4UL) <= M; i+=4UL )
1241  {
1242  size_t j( 0UL );
1243 
1244  for( ; (j+2UL) <= N; j+=2UL )
1245  {
1246  const size_t kbegin( ( IsUpper<MT4>::value )
1247  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1248  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1249  const size_t kend( ( IsLower<MT4>::value )
1250  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
1251  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1252 
1253  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1254  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1255 
1256  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1257  size_t k( kbegin );
1258 
1259  for( ; k<kpos; k+=SIMDSIZE ) {
1260  const SIMDType a1( A.load(i ,k) );
1261  const SIMDType a2( A.load(i+1UL,k) );
1262  const SIMDType a3( A.load(i+2UL,k) );
1263  const SIMDType a4( A.load(i+3UL,k) );
1264  const SIMDType b1( B.load(k,j ) );
1265  const SIMDType b2( B.load(k,j+1UL) );
1266  xmm1 = xmm1 + a1 * b1;
1267  xmm2 = xmm2 + a1 * b2;
1268  xmm3 = xmm3 + a2 * b1;
1269  xmm4 = xmm4 + a2 * b2;
1270  xmm5 = xmm5 + a3 * b1;
1271  xmm6 = xmm6 + a3 * b2;
1272  xmm7 = xmm7 + a4 * b1;
1273  xmm8 = xmm8 + a4 * b2;
1274  }
1275 
1276  (~C)(i ,j ) = sum( xmm1 );
1277  (~C)(i ,j+1UL) = sum( xmm2 );
1278  (~C)(i+1UL,j ) = sum( xmm3 );
1279  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1280  (~C)(i+2UL,j ) = sum( xmm5 );
1281  (~C)(i+2UL,j+1UL) = sum( xmm6 );
1282  (~C)(i+3UL,j ) = sum( xmm7 );
1283  (~C)(i+3UL,j+1UL) = sum( xmm8 );
1284 
1285  for( ; remainder && k<kend; ++k ) {
1286  (~C)(i ,j ) += A(i ,k) * B(k,j );
1287  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1288  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1289  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1290  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j );
1291  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL);
1292  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j );
1293  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL);
1294  }
1295  }
1296 
1297  if( j < N )
1298  {
1299  const size_t kbegin( ( IsUpper<MT4>::value )
1300  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1301  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1302  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
1303 
1304  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1305  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1306 
1307  SIMDType xmm1, xmm2, xmm3, xmm4;
1308  size_t k( kbegin );
1309 
1310  for( ; k<kpos; k+=SIMDSIZE ) {
1311  const SIMDType b1( B.load(k,j) );
1312  xmm1 = xmm1 + A.load(i ,k) * b1;
1313  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1314  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1315  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1316  }
1317 
1318  (~C)(i ,j) = sum( xmm1 );
1319  (~C)(i+1UL,j) = sum( xmm2 );
1320  (~C)(i+2UL,j) = sum( xmm3 );
1321  (~C)(i+3UL,j) = sum( xmm4 );
1322 
1323  for( ; remainder && k<kend; ++k ) {
1324  (~C)(i ,j) += A(i ,k) * B(k,j);
1325  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
1326  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j);
1327  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j);
1328  }
1329  }
1330  }
1331 
1332  for( ; (i+2UL) <= M; i+=2UL )
1333  {
1334  size_t j( 0UL );
1335 
1336  for( ; (j+2UL) <= N; j+=2UL )
1337  {
1338  const size_t kbegin( ( IsUpper<MT4>::value )
1339  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1340  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1341  const size_t kend( ( IsLower<MT4>::value )
1342  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
1343  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1344 
1345  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1346  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1347 
1348  SIMDType xmm1, xmm2, xmm3, xmm4;
1349  size_t k( kbegin );
1350 
1351  for( ; k<kpos; k+=SIMDSIZE ) {
1352  const SIMDType a1( A.load(i ,k) );
1353  const SIMDType a2( A.load(i+1UL,k) );
1354  const SIMDType b1( B.load(k,j ) );
1355  const SIMDType b2( B.load(k,j+1UL) );
1356  xmm1 = xmm1 + a1 * b1;
1357  xmm2 = xmm2 + a1 * b2;
1358  xmm3 = xmm3 + a2 * b1;
1359  xmm4 = xmm4 + a2 * b2;
1360  }
1361 
1362  (~C)(i ,j ) = sum( xmm1 );
1363  (~C)(i ,j+1UL) = sum( xmm2 );
1364  (~C)(i+1UL,j ) = sum( xmm3 );
1365  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1366 
1367  for( ; remainder && k<kend; ++k ) {
1368  (~C)(i ,j ) += A(i ,k) * B(k,j );
1369  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
1370  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
1371  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
1372  }
1373  }
1374 
1375  if( j < N )
1376  {
1377  const size_t kbegin( ( IsUpper<MT4>::value )
1378  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1379  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1380  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
1381 
1382  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1383  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1384 
1385  SIMDType xmm1, xmm2;
1386  size_t k( kbegin );
1387 
1388  for( ; k<kpos; k+=SIMDSIZE ) {
1389  const SIMDType b1( B.load(k,j) );
1390  xmm1 = xmm1 + A.load(i ,k) * b1;
1391  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1392  }
1393 
1394  (~C)(i ,j) = sum( xmm1 );
1395  (~C)(i+1UL,j) = sum( xmm2 );
1396 
1397  for( ; remainder && k<kend; ++k ) {
1398  (~C)(i ,j) += A(i ,k) * B(k,j);
1399  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
1400  }
1401  }
1402  }
1403 
1404  if( i < M )
1405  {
1406  size_t j( 0UL );
1407 
1408  for( ; (j+2UL) <= N; j+=2UL )
1409  {
1410  const size_t kbegin( ( IsUpper<MT4>::value )
1411  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1412  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1413  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
1414 
1415  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
1416  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1417 
1418  SIMDType xmm1, xmm2;
1419  size_t k( kbegin );
1420 
1421  for( ; k<kpos; k+=SIMDSIZE ) {
1422  const SIMDType a1( A.load(i,k) );
1423  xmm1 = xmm1 + a1 * B.load(k,j );
1424  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1425  }
1426 
1427  (~C)(i,j ) = sum( xmm1 );
1428  (~C)(i,j+1UL) = sum( xmm2 );
1429 
1430  for( ; remainder && k<kend; ++k ) {
1431  (~C)(i,j ) += A(i,k) * B(k,j );
1432  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
1433  }
1434  }
1435 
1436  if( j < N )
1437  {
1438  const size_t kbegin( ( IsUpper<MT4>::value )
1439  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
1440  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
1441 
1442  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
1443  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
1444 
1445  SIMDType xmm1;
1446  size_t k( kbegin );
1447 
1448  for( ; k<kpos; k+=SIMDSIZE ) {
1449  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1450  }
1451 
1452  (~C)(i,j) = sum( xmm1 );
1453 
1454  for( ; remainder && k<K; ++k ) {
1455  (~C)(i,j) += A(i,k) * B(k,j);
1456  }
1457  }
1458  }
1459  }
1461  //**********************************************************************************************
1462 
1463  //**Default assignment to dense matrices (large matrices)***************************************
1477  template< typename MT3 // Type of the left-hand side target matrix
1478  , typename MT4 // Type of the left-hand side matrix operand
1479  , typename MT5 > // Type of the right-hand side matrix operand
1480  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
1481  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1482  {
1483  selectDefaultAssignKernel( C, A, B );
1484  }
1486  //**********************************************************************************************
1487 
1488  //**Vectorized default assignment to row-major dense matrices (large matrices)******************
1503  template< typename MT3 // Type of the left-hand side target matrix
1504  , typename MT4 // Type of the left-hand side matrix operand
1505  , typename MT5 > // Type of the right-hand side matrix operand
1506  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
1507  selectLargeAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1508  {
1509  // TODO
1510  selectSmallAssignKernel( ~C, A, B );
1511  }
1513  //**********************************************************************************************
1514 
1515  //**Vectorized default assignment to column-major dense matrices (large matrices)***************
1530  template< typename MT3 // Type of the left-hand side target matrix
1531  , typename MT4 // Type of the left-hand side matrix operand
1532  , typename MT5 > // Type of the right-hand side matrix operand
1533  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
1534  selectLargeAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1535  {
1536  // TODO
1537  selectSmallAssignKernel( ~C, A, B );
1538  }
1540  //**********************************************************************************************
1541 
1542  //**Default assignment to dense matrices********************************************************
1556  template< typename MT3 // Type of the left-hand side target matrix
1557  , typename MT4 // Type of the left-hand side matrix operand
1558  , typename MT5 > // Type of the right-hand side matrix operand
1559  static inline DisableIf_< UseBlasKernel<MT3,MT4,MT5> >
1560  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1561  {
1562  selectLargeAssignKernel( C, A, B );
1563  }
1565  //**********************************************************************************************
1566 
1567  //**BLAS-based assignment to dense matrices*****************************************************
1568 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
1569 
1582  template< typename MT3 // Type of the left-hand side target matrix
1583  , typename MT4 // Type of the left-hand side matrix operand
1584  , typename MT5 > // Type of the right-hand side matrix operand
1585  static inline EnableIf_< UseBlasKernel<MT3,MT4,MT5> >
1586  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1587  {
1588  typedef ElementType_<MT3> ET;
1589 
1590  if( IsTriangular<MT4>::value ) {
1591  assign( C, B );
1592  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1593  }
1594  else if( IsTriangular<MT5>::value ) {
1595  assign( C, A );
1596  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
1597  }
1598  else {
1599  gemm( C, A, B, ET(1), ET(0) );
1600  }
1601  }
1603 #endif
1604  //**********************************************************************************************
1605 
1606  //**Assignment to sparse matrices***************************************************************
1619  template< typename MT // Type of the target sparse matrix
1620  , bool SO > // Storage order of the target sparse matrix
1621  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1622  {
1624 
1625  typedef IfTrue_< SO, OppositeType, ResultType > TmpType;
1626 
1632  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( CompositeType_<TmpType> );
1633 
1634  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1635  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1636 
1637  const TmpType tmp( serial( rhs ) );
1638  assign( ~lhs, tmp );
1639  }
1641  //**********************************************************************************************
1642 
1643  //**Addition assignment to dense matrices*******************************************************
1656  template< typename MT // Type of the target dense matrix
1657  , bool SO > // Storage order of the target dense matrix
1658  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1659  {
1661 
1662  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1663  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1664 
1665  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1666  return;
1667  }
1668 
1669  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1670  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1671 
1672  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1673  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1674  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1675  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1676  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1677  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1678 
1679  DMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1680  }
1682  //**********************************************************************************************
1683 
1684  //**Addition assignment to dense matrices (kernel selection)************************************
1695  template< typename MT3 // Type of the left-hand side target matrix
1696  , typename MT4 // Type of the left-hand side matrix operand
1697  , typename MT5 > // Type of the right-hand side matrix operand
1698  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1699  {
1700  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
1701  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
1702  selectSmallAddAssignKernel( C, A, B );
1703  else
1704  selectBlasAddAssignKernel( C, A, B );
1705  }
1707  //**********************************************************************************************
1708 
1709  //**Default addition assignment to row-major dense matrices (general/general)*******************
1723  template< typename MT3 // Type of the left-hand side target matrix
1724  , typename MT4 // Type of the left-hand side matrix operand
1725  , typename MT5 > // Type of the right-hand side matrix operand
1726  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
1727  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1728  {
1729  const size_t M( A.rows() );
1730  const size_t N( B.columns() );
1731  const size_t K( A.columns() );
1732 
1733  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
1734  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
1735  :( 0UL ) );
1736  const size_t iend( ( IsStrictlyUpper<MT4>::value )
1737  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
1738  :( M ) );
1739  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1740 
1741  for( size_t i=ibegin; i<iend; ++i )
1742  {
1743  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
1744  ?( ( IsStrictlyUpper<MT4>::value )
1745  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
1746  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
1747  :( IsStrictlyUpper<MT5>::value ? 1UL : 0UL ) );
1748  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
1749  ?( ( IsStrictlyLower<MT4>::value )
1750  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
1751  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
1752  :( IsStrictlyLower<MT5>::value ? N-1UL : N ) );
1753  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1754 
1755  for( size_t j=jbegin; j<jend; ++j )
1756  {
1757  const size_t kbegin( ( IsUpper<MT4>::value )
1758  ?( ( IsLower<MT5>::value )
1759  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1760  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1761  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1762  :( ( IsLower<MT5>::value )
1763  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1764  :( 0UL ) ) );
1765  const size_t kend( ( IsLower<MT4>::value )
1766  ?( ( IsUpper<MT5>::value )
1767  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
1768  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
1769  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1770  :( ( IsUpper<MT5>::value )
1771  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1772  :( K ) ) );
1773  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
1774 
1775  const size_t knum( kend - kbegin );
1776  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
1777 
1778  for( size_t k=kbegin; k<kpos; k+=2UL ) {
1779  (~C)(i,j) += A(i,k ) * B(k ,j);
1780  (~C)(i,j) += A(i,k+1UL) * B(k+1UL,j);
1781  }
1782  if( kpos < kend ) {
1783  (~C)(i,j) += A(i,kpos) * B(kpos,j);
1784  }
1785  }
1786  }
1787  }
1789  //**********************************************************************************************
1790 
1791  //**Default addition assignment to column-major dense matrices (general/general)****************
1805  template< typename MT3 // Type of the left-hand side target matrix
1806  , typename MT4 // Type of the left-hand side matrix operand
1807  , typename MT5 > // Type of the right-hand side matrix operand
1808  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
1809  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1810  {
1811  const size_t M( A.rows() );
1812  const size_t N( B.columns() );
1813  const size_t K( A.columns() );
1814 
1815  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
1816  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
1817  :( 0UL ) );
1818  const size_t jend( ( IsStrictlyLower<MT5>::value )
1819  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
1820  :( N ) );
1821  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1822 
1823  for( size_t j=jbegin; j<jend; ++j )
1824  {
1825  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
1826  ?( ( IsStrictlyLower<MT4>::value )
1827  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
1828  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1829  :( IsStrictlyLower<MT4>::value ? 1UL : 0UL ) );
1830  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
1831  ?( ( IsStrictlyUpper<MT4>::value )
1832  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
1833  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
1834  :( IsStrictlyUpper<MT4>::value ? M-1UL : M ) );
1835  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1836 
1837  for( size_t i=ibegin; i<iend; ++i )
1838  {
1839  const size_t kbegin( ( IsUpper<MT4>::value )
1840  ?( ( IsLower<MT5>::value )
1841  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1842  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1843  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1844  :( ( IsLower<MT5>::value )
1845  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1846  :( 0UL ) ) );
1847  const size_t kend( ( IsLower<MT4>::value )
1848  ?( ( IsUpper<MT5>::value )
1849  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
1850  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
1851  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1852  :( ( IsUpper<MT5>::value )
1853  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1854  :( K ) ) );
1855  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
1856 
1857  const size_t knum( kend - kbegin );
1858  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
1859 
1860  for( size_t k=kbegin; k<kpos; k+=2UL ) {
1861  (~C)(i,j) += A(i,k ) * B(k ,j);
1862  (~C)(i,j) += A(i,k+1UL) * B(k+1UL,j);
1863  }
1864  if( kpos < kend ) {
1865  (~C)(i,j) += A(i,kpos) * B(kpos,j);
1866  }
1867  }
1868  }
1869  }
1871  //**********************************************************************************************
1872 
1873  //**Default addition assignment to row-major dense matrices (general/diagonal)******************
1887  template< typename MT3 // Type of the left-hand side target matrix
1888  , typename MT4 // Type of the left-hand side matrix operand
1889  , typename MT5 > // Type of the right-hand side matrix operand
1890  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
1891  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1892  {
1893  const size_t M( A.rows() );
1894  const size_t N( B.columns() );
1895 
1896  for( size_t i=0UL; i<M; ++i )
1897  {
1898  const size_t jbegin( ( IsUpper<MT4>::value )
1899  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1900  :( 0UL ) );
1901  const size_t jend( ( IsLower<MT4>::value )
1902  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
1903  :( N ) );
1904  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1905 
1906  const size_t jnum( jend - jbegin );
1907  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1908 
1909  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1910  (~C)(i,j ) += A(i,j ) * B(j ,j );
1911  (~C)(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL);
1912  }
1913  if( jpos < jend ) {
1914  (~C)(i,jpos) += A(i,jpos) * B(jpos,jpos);
1915  }
1916  }
1917  }
1919  //**********************************************************************************************
1920 
1921  //**Default addition assignment to column-major dense matrices (general/diagonal)***************
1935  template< typename MT3 // Type of the left-hand side target matrix
1936  , typename MT4 // Type of the left-hand side matrix operand
1937  , typename MT5 > // Type of the right-hand side matrix operand
1938  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
1939  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1940  {
1941  const size_t M( A.rows() );
1942  const size_t N( B.columns() );
1943 
1944  const size_t block( BLOCK_SIZE );
1945 
1946  for( size_t jj=0UL; jj<N; jj+=block ) {
1947  const size_t jend( min( N, jj+block ) );
1948  for( size_t ii=0UL; ii<M; ii+=block ) {
1949  const size_t iend( min( M, ii+block ) );
1950  for( size_t j=jj; j<jend; ++j )
1951  {
1952  const size_t ibegin( ( IsLower<MT4>::value )
1953  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
1954  :( ii ) );
1955  const size_t ipos( ( IsUpper<MT4>::value )
1956  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
1957  :( iend ) );
1958 
1959  for( size_t i=ibegin; i<ipos; ++i ) {
1960  (~C)(i,j) += A(i,j) * B(j,j);
1961  }
1962  }
1963  }
1964  }
1965  }
1967  //**********************************************************************************************
1968 
1969  //**Default addition assignment to row-major dense matrices (diagonal/general)******************
1983  template< typename MT3 // Type of the left-hand side target matrix
1984  , typename MT4 // Type of the left-hand side matrix operand
1985  , typename MT5 > // Type of the right-hand side matrix operand
1986  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
1987  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1988  {
1989  const size_t M( A.rows() );
1990  const size_t N( B.columns() );
1991 
1992  const size_t block( BLOCK_SIZE );
1993 
1994  for( size_t ii=0UL; ii<M; ii+=block ) {
1995  const size_t iend( min( M, ii+block ) );
1996  for( size_t jj=0UL; jj<N; jj+=block ) {
1997  const size_t jend( min( N, jj+block ) );
1998  for( size_t i=ii; i<iend; ++i )
1999  {
2000  const size_t jbegin( ( IsUpper<MT5>::value )
2001  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
2002  :( jj ) );
2003  const size_t jpos( ( IsLower<MT5>::value )
2004  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
2005  :( jend ) );
2006 
2007  for( size_t j=jbegin; j<jpos; ++j ) {
2008  (~C)(i,j) += A(i,i) * B(i,j);
2009  }
2010  }
2011  }
2012  }
2013  }
2015  //**********************************************************************************************
2016 
2017  //**Default addition assignment to column-major dense matrices (diagonal/general)***************
2031  template< typename MT3 // Type of the left-hand side target matrix
2032  , typename MT4 // Type of the left-hand side matrix operand
2033  , typename MT5 > // Type of the right-hand side matrix operand
2034  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
2035  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2036  {
2037  const size_t M( A.rows() );
2038  const size_t N( B.columns() );
2039 
2040  for( size_t j=0UL; j<N; ++j )
2041  {
2042  const size_t ibegin( ( IsLower<MT5>::value )
2043  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2044  :( 0UL ) );
2045  const size_t iend( ( IsUpper<MT5>::value )
2046  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2047  :( M ) );
2048  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2049 
2050  const size_t inum( iend - ibegin );
2051  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2052 
2053  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2054  (~C)(i ,j) += A(i ,i ) * B(i ,j);
2055  (~C)(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j);
2056  }
2057  if( ipos < iend ) {
2058  (~C)(ipos,j) += A(ipos,ipos) * B(ipos,j);
2059  }
2060  }
2061  }
2063  //**********************************************************************************************
2064 
2065  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
2079  template< typename MT3 // Type of the left-hand side target matrix
2080  , typename MT4 // Type of the left-hand side matrix operand
2081  , typename MT5 > // Type of the right-hand side matrix operand
2082  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
2083  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2084  {
2085  for( size_t i=0UL; i<A.rows(); ++i ) {
2086  C(i,i) += A(i,i) * B(i,i);
2087  }
2088  }
2090  //**********************************************************************************************
2091 
2092  //**Default addition assignment to dense matrices (small matrices)******************************
2106  template< typename MT3 // Type of the left-hand side target matrix
2107  , typename MT4 // Type of the left-hand side matrix operand
2108  , typename MT5 > // Type of the right-hand side matrix operand
2109  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
2110  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2111  {
2112  selectDefaultAddAssignKernel( C, A, B );
2113  }
2115  //**********************************************************************************************
2116 
2117  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
2132  template< typename MT3 // Type of the left-hand side target matrix
2133  , typename MT4 // Type of the left-hand side matrix operand
2134  , typename MT5 > // Type of the right-hand side matrix operand
2135  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
2136  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2137  {
2138  const size_t M( A.rows() );
2139  const size_t N( B.columns() );
2140  const size_t K( A.columns() );
2141 
2142  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
2143 
2144  size_t i( 0UL );
2145 
2146  for( ; (i+2UL) <= M; i+=2UL )
2147  {
2148  size_t j( 0UL );
2149 
2150  for( ; (j+4UL) <= N; j+=4UL )
2151  {
2152  const size_t kbegin( ( IsUpper<MT4>::value )
2153  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2154  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2155  const size_t kend( ( IsLower<MT4>::value )
2156  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
2157  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
2158 
2159  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2160  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2161 
2162  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2163  size_t k( kbegin );
2164 
2165  for( ; k<kpos; k+=SIMDSIZE ) {
2166  const SIMDType a1( A.load(i ,k) );
2167  const SIMDType a2( A.load(i+1UL,k) );
2168  const SIMDType b1( B.load(k,j ) );
2169  const SIMDType b2( B.load(k,j+1UL) );
2170  const SIMDType b3( B.load(k,j+2UL) );
2171  const SIMDType b4( B.load(k,j+3UL) );
2172  xmm1 = xmm1 + a1 * b1;
2173  xmm2 = xmm2 + a1 * b2;
2174  xmm3 = xmm3 + a1 * b3;
2175  xmm4 = xmm4 + a1 * b4;
2176  xmm5 = xmm5 + a2 * b1;
2177  xmm6 = xmm6 + a2 * b2;
2178  xmm7 = xmm7 + a2 * b3;
2179  xmm8 = xmm8 + a2 * b4;
2180  }
2181 
2182  (~C)(i ,j ) += sum( xmm1 );
2183  (~C)(i ,j+1UL) += sum( xmm2 );
2184  (~C)(i ,j+2UL) += sum( xmm3 );
2185  (~C)(i ,j+3UL) += sum( xmm4 );
2186  (~C)(i+1UL,j ) += sum( xmm5 );
2187  (~C)(i+1UL,j+1UL) += sum( xmm6 );
2188  (~C)(i+1UL,j+2UL) += sum( xmm7 );
2189  (~C)(i+1UL,j+3UL) += sum( xmm8 );
2190 
2191  for( ; remainder && k<kend; ++k ) {
2192  (~C)(i ,j ) += A(i ,k) * B(k,j );
2193  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2194  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL);
2195  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL);
2196  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2197  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2198  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL);
2199  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL);
2200  }
2201  }
2202 
2203  for( ; (j+2UL) <= N; j+=2UL )
2204  {
2205  const size_t kbegin( ( IsUpper<MT4>::value )
2206  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2207  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2208  const size_t kend( ( IsLower<MT4>::value )
2209  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
2210  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2211 
2212  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2213  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2214 
2215  SIMDType xmm1, xmm2, xmm3, xmm4;
2216  size_t k( kbegin );
2217 
2218  for( ; k<kpos; k+=SIMDSIZE ) {
2219  const SIMDType a1( A.load(i ,k) );
2220  const SIMDType a2( A.load(i+1UL,k) );
2221  const SIMDType b1( B.load(k,j ) );
2222  const SIMDType b2( B.load(k,j+1UL) );
2223  xmm1 = xmm1 + a1 * b1;
2224  xmm2 = xmm2 + a1 * b2;
2225  xmm3 = xmm3 + a2 * b1;
2226  xmm4 = xmm4 + a2 * b2;
2227  }
2228 
2229  (~C)(i ,j ) += sum( xmm1 );
2230  (~C)(i ,j+1UL) += sum( xmm2 );
2231  (~C)(i+1UL,j ) += sum( xmm3 );
2232  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2233 
2234  for( ; remainder && k<kend; ++k ) {
2235  (~C)(i ,j ) += A(i ,k) * B(k,j );
2236  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2237  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2238  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2239  }
2240  }
2241 
2242  if( j < N )
2243  {
2244  const size_t kbegin( ( IsUpper<MT4>::value )
2245  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2246  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2247  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
2248 
2249  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2250  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2251 
2252  SIMDType xmm1, xmm2;
2253  size_t k( kbegin );
2254 
2255  for( ; k<kpos; k+=SIMDSIZE ) {
2256  const SIMDType b1( B.load(k,j) );
2257  xmm1 = xmm1 + A.load(i ,k) * b1;
2258  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2259  }
2260 
2261  (~C)(i ,j) += sum( xmm1 );
2262  (~C)(i+1UL,j) += sum( xmm2 );
2263 
2264  for( ; remainder && k<kend; ++k ) {
2265  (~C)(i ,j) += A(i ,k) * B(k,j);
2266  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
2267  }
2268  }
2269  }
2270  if( i < M )
2271  {
2272  size_t j( 0UL );
2273 
2274  for( ; (j+4UL) <= N; j+=4UL )
2275  {
2276  const size_t kbegin( ( IsUpper<MT4>::value )
2277  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2278  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2279  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
2280 
2281  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2282  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2283 
2284  SIMDType xmm1, xmm2, xmm3, xmm4;
2285  size_t k( kbegin );
2286 
2287  for( ; k<kpos; k+=SIMDSIZE ) {
2288  const SIMDType a1( A.load(i,k) );
2289  xmm1 = xmm1 + a1 * B.load(k,j );
2290  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2291  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
2292  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
2293  }
2294 
2295  (~C)(i,j ) += sum( xmm1 );
2296  (~C)(i,j+1UL) += sum( xmm2 );
2297  (~C)(i,j+2UL) += sum( xmm3 );
2298  (~C)(i,j+3UL) += sum( xmm4 );
2299 
2300  for( ; remainder && k<kend; ++k ) {
2301  (~C)(i,j ) += A(i,k) * B(k,j );
2302  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
2303  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL);
2304  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL);
2305  }
2306  }
2307 
2308  for( ; (j+2UL) <= N; j+=2UL )
2309  {
2310  const size_t kbegin( ( IsUpper<MT4>::value )
2311  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2312  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2313  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
2314 
2315  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2316  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2317 
2318  SIMDType xmm1, xmm2;
2319  size_t k( kbegin );
2320 
2321  for( ; k<kpos; k+=SIMDSIZE ) {
2322  const SIMDType a1( A.load(i,k) );
2323  xmm1 = xmm1 + a1 * B.load(k,j );
2324  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2325  }
2326 
2327  (~C)(i,j ) += sum( xmm1 );
2328  (~C)(i,j+1UL) += sum( xmm2 );
2329 
2330  for( ; remainder && k<kend; ++k ) {
2331  (~C)(i,j ) += A(i,k) * B(k,j );
2332  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
2333  }
2334  }
2335 
2336  if( j < N )
2337  {
2338  const size_t kbegin( ( IsUpper<MT4>::value )
2339  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2340  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2341 
2342  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
2343  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2344 
2345  SIMDType xmm1;
2346  size_t k( kbegin );
2347 
2348  for( ; k<kpos; k+=SIMDSIZE ) {
2349  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2350  }
2351 
2352  (~C)(i,j) += sum( xmm1 );
2353 
2354  for( ; remainder && k<K; ++k ) {
2355  (~C)(i,j) += A(i,k) * B(k,j);
2356  }
2357  }
2358  }
2359  }
2361  //**********************************************************************************************
2362 
2363  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
2378  template< typename MT3 // Type of the left-hand side target matrix
2379  , typename MT4 // Type of the left-hand side matrix operand
2380  , typename MT5 > // Type of the right-hand side matrix operand
2381  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
2382  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2383  {
2384  const size_t M( A.rows() );
2385  const size_t N( B.columns() );
2386  const size_t K( A.columns() );
2387 
2388  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
2389 
2390  size_t i( 0UL );
2391 
2392  for( ; (i+4UL) <= M; i+=4UL )
2393  {
2394  size_t j( 0UL );
2395 
2396  for( ; (j+2UL) <= N; j+=2UL )
2397  {
2398  const size_t kbegin( ( IsUpper<MT4>::value )
2399  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2400  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2401  const size_t kend( ( IsLower<MT4>::value )
2402  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
2403  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2404 
2405  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2406  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2407 
2408  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2409  size_t k( kbegin );
2410 
2411  for( ; k<kpos; k+=SIMDSIZE ) {
2412  const SIMDType a1( A.load(i ,k) );
2413  const SIMDType a2( A.load(i+1UL,k) );
2414  const SIMDType a3( A.load(i+2UL,k) );
2415  const SIMDType a4( A.load(i+3UL,k) );
2416  const SIMDType b1( B.load(k,j ) );
2417  const SIMDType b2( B.load(k,j+1UL) );
2418  xmm1 = xmm1 + a1 * b1;
2419  xmm2 = xmm2 + a1 * b2;
2420  xmm3 = xmm3 + a2 * b1;
2421  xmm4 = xmm4 + a2 * b2;
2422  xmm5 = xmm5 + a3 * b1;
2423  xmm6 = xmm6 + a3 * b2;
2424  xmm7 = xmm7 + a4 * b1;
2425  xmm8 = xmm8 + a4 * b2;
2426  }
2427 
2428  (~C)(i ,j ) += sum( xmm1 );
2429  (~C)(i ,j+1UL) += sum( xmm2 );
2430  (~C)(i+1UL,j ) += sum( xmm3 );
2431  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2432  (~C)(i+2UL,j ) += sum( xmm5 );
2433  (~C)(i+2UL,j+1UL) += sum( xmm6 );
2434  (~C)(i+3UL,j ) += sum( xmm7 );
2435  (~C)(i+3UL,j+1UL) += sum( xmm8 );
2436 
2437  for( ; remainder && k<kend; ++k ) {
2438  (~C)(i ,j ) += A(i ,k) * B(k,j );
2439  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2440  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2441  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2442  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j );
2443  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL);
2444  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j );
2445  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL);
2446  }
2447  }
2448 
2449  if( j < N )
2450  {
2451  const size_t kbegin( ( IsUpper<MT4>::value )
2452  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2453  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2454  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
2455 
2456  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2457  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2458 
2459  SIMDType xmm1, xmm2, xmm3, xmm4;
2460  size_t k( kbegin );
2461 
2462  for( ; k<kpos; k+=SIMDSIZE ) {
2463  const SIMDType b1( B.load(k,j) );
2464  xmm1 = xmm1 + A.load(i ,k) * b1;
2465  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2466  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
2467  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
2468  }
2469 
2470  (~C)(i ,j) += sum( xmm1 );
2471  (~C)(i+1UL,j) += sum( xmm2 );
2472  (~C)(i+2UL,j) += sum( xmm3 );
2473  (~C)(i+3UL,j) += sum( xmm4 );
2474 
2475  for( ; remainder && k<kend; ++k ) {
2476  (~C)(i ,j) += A(i ,k) * B(k,j);
2477  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
2478  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j);
2479  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j);
2480  }
2481  }
2482  }
2483 
2484  for( ; (i+2UL) <= M; i+=2UL )
2485  {
2486  size_t j( 0UL );
2487 
2488  for( ; (j+2UL) <= N; j+=2UL )
2489  {
2490  const size_t kbegin( ( IsUpper<MT4>::value )
2491  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2492  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2493  const size_t kend( ( IsLower<MT4>::value )
2494  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
2495  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2496 
2497  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2498  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2499 
2500  SIMDType xmm1, xmm2, xmm3, xmm4;
2501  size_t k( kbegin );
2502 
2503  for( ; k<kpos; k+=SIMDSIZE ) {
2504  const SIMDType a1( A.load(i ,k) );
2505  const SIMDType a2( A.load(i+1UL,k) );
2506  const SIMDType b1( B.load(k,j ) );
2507  const SIMDType b2( B.load(k,j+1UL) );
2508  xmm1 = xmm1 + a1 * b1;
2509  xmm2 = xmm2 + a1 * b2;
2510  xmm3 = xmm3 + a2 * b1;
2511  xmm4 = xmm4 + a2 * b2;
2512  }
2513 
2514  (~C)(i ,j ) += sum( xmm1 );
2515  (~C)(i ,j+1UL) += sum( xmm2 );
2516  (~C)(i+1UL,j ) += sum( xmm3 );
2517  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2518 
2519  for( ; remainder && k<kend; ++k ) {
2520  (~C)(i ,j ) += A(i ,k) * B(k,j );
2521  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL);
2522  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j );
2523  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL);
2524  }
2525  }
2526 
2527  if( j < N )
2528  {
2529  const size_t kbegin( ( IsUpper<MT4>::value )
2530  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2531  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2532  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
2533 
2534  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2535  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2536 
2537  SIMDType xmm1, xmm2;
2538  size_t k( kbegin );
2539 
2540  for( ; k<kpos; k+=SIMDSIZE ) {
2541  const SIMDType b1( B.load(k,j) );
2542  xmm1 = xmm1 + A.load(i ,k) * b1;
2543  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2544  }
2545 
2546  (~C)(i ,j) += sum( xmm1 );
2547  (~C)(i+1UL,j) += sum( xmm2 );
2548 
2549  for( ; remainder && k<kend; ++k ) {
2550  (~C)(i ,j) += A(i ,k) * B(k,j);
2551  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j);
2552  }
2553  }
2554  }
2555 
2556  if( i < M )
2557  {
2558  size_t j( 0UL );
2559 
2560  for( ; (j+2UL) <= N; j+=2UL )
2561  {
2562  const size_t kbegin( ( IsUpper<MT4>::value )
2563  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2564  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2565  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
2566 
2567  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
2568  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2569 
2570  SIMDType xmm1, xmm2;
2571  size_t k( kbegin );
2572 
2573  for( ; k<kpos; k+=SIMDSIZE ) {
2574  const SIMDType a1( A.load(i,k) );
2575  xmm1 = xmm1 + a1 * B.load(k,j );
2576  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2577  }
2578 
2579  (~C)(i,j ) += sum( xmm1 );
2580  (~C)(i,j+1UL) += sum( xmm2 );
2581 
2582  for( ; remainder && k<kend; ++k ) {
2583  (~C)(i,j ) += A(i,k) * B(k,j );
2584  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL);
2585  }
2586  }
2587 
2588  if( j < N )
2589  {
2590  const size_t kbegin( ( IsUpper<MT4>::value )
2591  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
2592  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
2593 
2594  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
2595  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
2596 
2597  SIMDType xmm1;
2598  size_t k( kbegin );
2599 
2600  for( ; k<kpos; k+=SIMDSIZE ) {
2601  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2602  }
2603 
2604  (~C)(i,j) += sum( xmm1 );
2605 
2606  for( ; remainder && k<K; ++k ) {
2607  (~C)(i,j) += A(i,k) * B(k,j);
2608  }
2609  }
2610  }
2611  }
2613  //**********************************************************************************************
2614 
2615  //**Default addition assignment to dense matrices (large matrices)******************************
2629  template< typename MT3 // Type of the left-hand side target matrix
2630  , typename MT4 // Type of the left-hand side matrix operand
2631  , typename MT5 > // Type of the right-hand side matrix operand
2632  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
2633  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2634  {
2635  selectDefaultAddAssignKernel( C, A, B );
2636  }
2638  //**********************************************************************************************
2639 
2640  //**Vectorized default addition assignment to row-major dense matrices (large matrices)*********
2655  template< typename MT3 // Type of the left-hand side target matrix
2656  , typename MT4 // Type of the left-hand side matrix operand
2657  , typename MT5 > // Type of the right-hand side matrix operand
2658  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
2659  selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2660  {
2661  // TODO
2662  selectSmallAddAssignKernel( ~C, A, B );
2663  }
2665  //**********************************************************************************************
2666 
2667  //**Vectorized default addition assignment to column-major dense matrices (large matrices)******
2682  template< typename MT3 // Type of the left-hand side target matrix
2683  , typename MT4 // Type of the left-hand side matrix operand
2684  , typename MT5 > // Type of the right-hand side matrix operand
2685  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
2686  selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2687  {
2688  // TODO
2689  selectSmallAddAssignKernel( ~C, A, B );
2690  }
2692  //**********************************************************************************************
2693 
2694  //**Default addition assignment to dense matrices***********************************************
2708  template< typename MT3 // Type of the left-hand side target matrix
2709  , typename MT4 // Type of the left-hand side matrix operand
2710  , typename MT5 > // Type of the right-hand side matrix operand
2711  static inline DisableIf_< UseBlasKernel<MT3,MT4,MT5> >
2712  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2713  {
2714  selectLargeAddAssignKernel( C, A, B );
2715  }
2717  //**********************************************************************************************
2718 
2719  //**BLAS-based addition assignment to dense matrices********************************************
2720 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
2721 
2734  template< typename MT3 // Type of the left-hand side target matrix
2735  , typename MT4 // Type of the left-hand side matrix operand
2736  , typename MT5 > // Type of the right-hand side matrix operand
2737  static inline EnableIf_< UseBlasKernel<MT3,MT4,MT5> >
2738  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2739  {
2740  typedef ElementType_<MT3> ET;
2741 
2742  if( IsTriangular<MT4>::value ) {
2743  ResultType_<MT3> tmp( serial( B ) );
2744  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2745  addAssign( C, tmp );
2746  }
2747  else if( IsTriangular<MT5>::value ) {
2748  ResultType_<MT3> tmp( serial( A ) );
2749  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
2750  addAssign( C, tmp );
2751  }
2752  else {
2753  gemm( C, A, B, ET(1), ET(1) );
2754  }
2755  }
2757 #endif
2758  //**********************************************************************************************
2759 
2760  //**Addition assignment to sparse matrices******************************************************
2761  // No special implementation for the addition assignment to sparse matrices.
2762  //**********************************************************************************************
2763 
2764  //**Subtraction assignment to dense matrices****************************************************
2777  template< typename MT // Type of the target dense matrix
2778  , bool SO > // Storage order of the target dense matrix
2779  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2780  {
2782 
2783  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2784  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2785 
2786  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2787  return;
2788  }
2789 
2790  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
2791  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
2792 
2793  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2794  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2795  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2796  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2797  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2798  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2799 
2800  DMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
2801  }
2803  //**********************************************************************************************
2804 
2805  //**Subtraction assignment to dense matrices (kernel selection)*********************************
2816  template< typename MT3 // Type of the left-hand side target matrix
2817  , typename MT4 // Type of the left-hand side matrix operand
2818  , typename MT5 > // Type of the right-hand side matrix operand
2819  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2820  {
2821  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
2822  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
2823  selectSmallSubAssignKernel( C, A, B );
2824  else
2825  selectBlasSubAssignKernel( C, A, B );
2826  }
2828  //**********************************************************************************************
2829 
2830  //**Default subtraction assignment to row-major dense matrices (general/general)****************
2844  template< typename MT3 // Type of the left-hand side target matrix
2845  , typename MT4 // Type of the left-hand side matrix operand
2846  , typename MT5 > // Type of the right-hand side matrix operand
2847  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
2848  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2849  {
2850  const size_t M( A.rows() );
2851  const size_t N( B.columns() );
2852  const size_t K( A.columns() );
2853 
2854  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
2855  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
2856  :( 0UL ) );
2857  const size_t iend( ( IsStrictlyUpper<MT4>::value )
2858  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
2859  :( M ) );
2860  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2861 
2862  for( size_t i=ibegin; i<iend; ++i )
2863  {
2864  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
2865  ?( ( IsStrictlyUpper<MT4>::value )
2866  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
2867  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
2868  :( IsStrictlyUpper<MT5>::value ? 1UL : 0UL ) );
2869  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
2870  ?( ( IsStrictlyLower<MT4>::value )
2871  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
2872  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
2873  :( IsStrictlyLower<MT5>::value ? N-1UL : N ) );
2874  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2875 
2876  for( size_t j=jbegin; j<jend; ++j )
2877  {
2878  const size_t kbegin( ( IsUpper<MT4>::value )
2879  ?( ( IsLower<MT5>::value )
2880  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2881  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2882  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2883  :( ( IsLower<MT5>::value )
2884  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2885  :( 0UL ) ) );
2886  const size_t kend( ( IsLower<MT4>::value )
2887  ?( ( IsUpper<MT5>::value )
2888  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
2889  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
2890  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
2891  :( ( IsUpper<MT5>::value )
2892  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2893  :( K ) ) );
2894  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
2895 
2896  const size_t knum( kend - kbegin );
2897  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
2898 
2899  for( size_t k=kbegin; k<kpos; k+=2UL ) {
2900  (~C)(i,j) -= A(i,k ) * B(k ,j);
2901  (~C)(i,j) -= A(i,k+1UL) * B(k+1UL,j);
2902  }
2903  if( kpos < kend ) {
2904  (~C)(i,j) -= A(i,kpos) * B(kpos,j);
2905  }
2906  }
2907  }
2908  }
2910  //**********************************************************************************************
2911 
2912  //**Default subtraction assignment to column-major dense matrices (general/general)*************
2926  template< typename MT3 // Type of the left-hand side target matrix
2927  , typename MT4 // Type of the left-hand side matrix operand
2928  , typename MT5 > // Type of the right-hand side matrix operand
2929  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
2930  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2931  {
2932  const size_t M( A.rows() );
2933  const size_t N( B.columns() );
2934  const size_t K( A.columns() );
2935 
2936  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
2937  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
2938  :( 0UL ) );
2939  const size_t jend( ( IsStrictlyLower<MT5>::value )
2940  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
2941  :( N ) );
2942  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2943 
2944  for( size_t j=jbegin; j<jend; ++j )
2945  {
2946  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
2947  ?( ( IsStrictlyLower<MT4>::value )
2948  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
2949  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2950  :( IsStrictlyLower<MT4>::value ? 1UL : 0UL ) );
2951  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
2952  ?( ( IsStrictlyUpper<MT4>::value )
2953  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
2954  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
2955  :( IsStrictlyUpper<MT4>::value ? M-1UL : M ) );
2956  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2957 
2958  for( size_t i=ibegin; i<iend; ++i )
2959  {
2960  const size_t kbegin( ( IsUpper<MT4>::value )
2961  ?( ( IsLower<MT5>::value )
2962  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2963  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2964  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2965  :( ( IsLower<MT5>::value )
2966  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2967  :( 0UL ) ) );
2968  const size_t kend( ( IsLower<MT4>::value )
2969  ?( ( IsUpper<MT5>::value )
2970  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
2971  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
2972  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
2973  :( ( IsUpper<MT5>::value )
2974  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2975  :( K ) ) );
2976  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
2977 
2978  const size_t knum( kend - kbegin );
2979  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
2980 
2981  for( size_t k=kbegin; k<kpos; k+=2UL ) {
2982  (~C)(i,j) -= A(i,k ) * B(k ,j);
2983  (~C)(i,j) -= A(i,k+1UL) * B(k+1UL,j);
2984  }
2985  if( kpos < kend ) {
2986  (~C)(i,j) -= A(i,kpos) * B(kpos,j);
2987  }
2988  }
2989  }
2990  }
2992  //**********************************************************************************************
2993 
2994  //**Default subtraction assignment to row-major dense matrices (general/diagonal)***************
3008  template< typename MT3 // Type of the left-hand side target matrix
3009  , typename MT4 // Type of the left-hand side matrix operand
3010  , typename MT5 > // Type of the right-hand side matrix operand
3011  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
3012  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3013  {
3014  const size_t M( A.rows() );
3015  const size_t N( B.columns() );
3016 
3017  for( size_t i=0UL; i<M; ++i )
3018  {
3019  const size_t jbegin( ( IsUpper<MT4>::value )
3020  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
3021  :( 0UL ) );
3022  const size_t jend( ( IsLower<MT4>::value )
3023  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
3024  :( N ) );
3025  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3026 
3027  const size_t jnum( jend - jbegin );
3028  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
3029 
3030  for( size_t j=jbegin; j<jpos; j+=2UL ) {
3031  (~C)(i,j ) -= A(i,j ) * B(j ,j );
3032  (~C)(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL);
3033  }
3034  if( jpos < jend ) {
3035  (~C)(i,jpos) -= A(i,jpos) * B(jpos,jpos);
3036  }
3037  }
3038  }
3040  //**********************************************************************************************
3041 
3042  //**Default subtraction assignment to column-major dense matrices (general/diagonal)************
3056  template< typename MT3 // Type of the left-hand side target matrix
3057  , typename MT4 // Type of the left-hand side matrix operand
3058  , typename MT5 > // Type of the right-hand side matrix operand
3059  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
3060  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3061  {
3062  const size_t M( A.rows() );
3063  const size_t N( B.columns() );
3064 
3065  const size_t block( BLOCK_SIZE );
3066 
3067  for( size_t jj=0UL; jj<N; jj+=block ) {
3068  const size_t jend( min( N, jj+block ) );
3069  for( size_t ii=0UL; ii<M; ii+=block ) {
3070  const size_t iend( min( M, ii+block ) );
3071  for( size_t j=jj; j<jend; ++j )
3072  {
3073  const size_t ibegin( ( IsLower<MT4>::value )
3074  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
3075  :( ii ) );
3076  const size_t ipos( ( IsUpper<MT4>::value )
3077  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
3078  :( iend ) );
3079 
3080  for( size_t i=ibegin; i<ipos; ++i ) {
3081  (~C)(i,j) -= A(i,j) * B(j,j);
3082  }
3083  }
3084  }
3085  }
3086  }
3088  //**********************************************************************************************
3089 
3090  //**Default subtraction assignment to row-major dense matrices (diagonal/general)***************
3104  template< typename MT3 // Type of the left-hand side target matrix
3105  , typename MT4 // Type of the left-hand side matrix operand
3106  , typename MT5 > // Type of the right-hand side matrix operand
3107  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
3108  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3109  {
3110  const size_t M( A.rows() );
3111  const size_t N( B.columns() );
3112 
3113  const size_t block( BLOCK_SIZE );
3114 
3115  for( size_t ii=0UL; ii<M; ii+=block ) {
3116  const size_t iend( min( M, ii+block ) );
3117  for( size_t jj=0UL; jj<N; jj+=block ) {
3118  const size_t jend( min( N, jj+block ) );
3119  for( size_t i=ii; i<iend; ++i )
3120  {
3121  const size_t jbegin( ( IsUpper<MT5>::value )
3122  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
3123  :( jj ) );
3124  const size_t jpos( ( IsLower<MT5>::value )
3125  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
3126  :( jend ) );
3127 
3128  for( size_t j=jbegin; j<jpos; ++j ) {
3129  (~C)(i,j) -= A(i,i) * B(i,j);
3130  }
3131  }
3132  }
3133  }
3134  }
3136  //**********************************************************************************************
3137 
3138  //**Default subtraction assignment to column-major dense matrices (diagonal/general)************
3152  template< typename MT3 // Type of the left-hand side target matrix
3153  , typename MT4 // Type of the left-hand side matrix operand
3154  , typename MT5 > // Type of the right-hand side matrix operand
3155  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
3156  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3157  {
3158  const size_t M( A.rows() );
3159  const size_t N( B.columns() );
3160 
3161  for( size_t j=0UL; j<N; ++j )
3162  {
3163  const size_t ibegin( ( IsLower<MT5>::value )
3164  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3165  :( 0UL ) );
3166  const size_t iend( ( IsUpper<MT5>::value )
3167  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3168  :( M ) );
3169  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3170 
3171  const size_t inum( iend - ibegin );
3172  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
3173 
3174  for( size_t i=ibegin; i<ipos; i+=2UL ) {
3175  (~C)(i ,j) -= A(i ,i ) * B(i ,j);
3176  (~C)(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j);
3177  }
3178  if( ipos < iend ) {
3179  (~C)(ipos,j) -= A(ipos,ipos) * B(ipos,j);
3180  }
3181  }
3182  }
3184  //**********************************************************************************************
3185 
3186  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
3200  template< typename MT3 // Type of the left-hand side target matrix
3201  , typename MT4 // Type of the left-hand side matrix operand
3202  , typename MT5 > // Type of the right-hand side matrix operand
3203  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
3204  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3205  {
3206  for( size_t i=0UL; i<A.rows(); ++i ) {
3207  C(i,i) -= A(i,i) * B(i,i);
3208  }
3209  }
3211  //**********************************************************************************************
3212 
3213  //**Default subtraction assignment to dense matrices (small matrices)***************************
3227  template< typename MT3 // Type of the left-hand side target matrix
3228  , typename MT4 // Type of the left-hand side matrix operand
3229  , typename MT5 > // Type of the right-hand side matrix operand
3230  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
3231  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3232  {
3233  selectDefaultSubAssignKernel( ~C, A, B );
3234  }
3236  //**********************************************************************************************
3237 
3238  //**Default subtraction assignment to row-major dense matrices (small matrices)*****************
3253  template< typename MT3 // Type of the left-hand side target matrix
3254  , typename MT4 // Type of the left-hand side matrix operand
3255  , typename MT5 > // Type of the right-hand side matrix operand
3256  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
3257  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3258  {
3259  const size_t M( A.rows() );
3260  const size_t N( B.columns() );
3261  const size_t K( A.columns() );
3262 
3263  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
3264 
3265  size_t i( 0UL );
3266 
3267  for( ; (i+2UL) <= M; i+=2UL )
3268  {
3269  size_t j( 0UL );
3270 
3271  for( ; (j+4UL) <= N; j+=4UL )
3272  {
3273  const size_t kbegin( ( IsUpper<MT4>::value )
3274  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3275  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3276  const size_t kend( ( IsLower<MT4>::value )
3277  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
3278  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
3279 
3280  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3281  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3282 
3283  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3284  size_t k( kbegin );
3285 
3286  for( ; k<kpos; k+=SIMDSIZE ) {
3287  const SIMDType a1( A.load(i ,k) );
3288  const SIMDType a2( A.load(i+1UL,k) );
3289  const SIMDType b1( B.load(k,j ) );
3290  const SIMDType b2( B.load(k,j+1UL) );
3291  const SIMDType b3( B.load(k,j+2UL) );
3292  const SIMDType b4( B.load(k,j+3UL) );
3293  xmm1 = xmm1 + a1 * b1;
3294  xmm2 = xmm2 + a1 * b2;
3295  xmm3 = xmm3 + a1 * b3;
3296  xmm4 = xmm4 + a1 * b4;
3297  xmm5 = xmm5 + a2 * b1;
3298  xmm6 = xmm6 + a2 * b2;
3299  xmm7 = xmm7 + a2 * b3;
3300  xmm8 = xmm8 + a2 * b4;
3301  }
3302 
3303  (~C)(i ,j ) -= sum( xmm1 );
3304  (~C)(i ,j+1UL) -= sum( xmm2 );
3305  (~C)(i ,j+2UL) -= sum( xmm3 );
3306  (~C)(i ,j+3UL) -= sum( xmm4 );
3307  (~C)(i+1UL,j ) -= sum( xmm5 );
3308  (~C)(i+1UL,j+1UL) -= sum( xmm6 );
3309  (~C)(i+1UL,j+2UL) -= sum( xmm7 );
3310  (~C)(i+1UL,j+3UL) -= sum( xmm8 );
3311 
3312  for( ; remainder && k<kend; ++k ) {
3313  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3314  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3315  (~C)(i ,j+2UL) -= A(i ,k) * B(k,j+2UL);
3316  (~C)(i ,j+3UL) -= A(i ,k) * B(k,j+3UL);
3317  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3318  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3319  (~C)(i+1UL,j+2UL) -= A(i+1UL,k) * B(k,j+2UL);
3320  (~C)(i+1UL,j+3UL) -= A(i+1UL,k) * B(k,j+3UL);
3321  }
3322  }
3323 
3324  for( ; (j+2UL) <= N; j+=2UL )
3325  {
3326  const size_t kbegin( ( IsUpper<MT4>::value )
3327  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3328  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3329  const size_t kend( ( IsLower<MT4>::value )
3330  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
3331  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3332 
3333  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3334  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3335 
3336  SIMDType xmm1, xmm2, xmm3, xmm4;
3337  size_t k( kbegin );
3338 
3339  for( ; k<kpos; k+=SIMDSIZE ) {
3340  const SIMDType a1( A.load(i ,k) );
3341  const SIMDType a2( A.load(i+1UL,k) );
3342  const SIMDType b1( B.load(k,j ) );
3343  const SIMDType b2( B.load(k,j+1UL) );
3344  xmm1 = xmm1 + a1 * b1;
3345  xmm2 = xmm2 + a1 * b2;
3346  xmm3 = xmm3 + a2 * b1;
3347  xmm4 = xmm4 + a2 * b2;
3348  }
3349 
3350  (~C)(i ,j ) -= sum( xmm1 );
3351  (~C)(i ,j+1UL) -= sum( xmm2 );
3352  (~C)(i+1UL,j ) -= sum( xmm3 );
3353  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3354 
3355  for( ; remainder && k<kend; ++k ) {
3356  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3357  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3358  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3359  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3360  }
3361  }
3362 
3363  if( j < N )
3364  {
3365  const size_t kbegin( ( IsUpper<MT4>::value )
3366  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3367  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3368  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
3369 
3370  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3371  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3372 
3373  SIMDType xmm1, xmm2;
3374  size_t k( kbegin );
3375 
3376  for( ; k<kpos; k+=SIMDSIZE ) {
3377  const SIMDType b1( B.load(k,j) );
3378  xmm1 = xmm1 + A.load(i ,k) * b1;
3379  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3380  }
3381 
3382  (~C)(i ,j) -= sum( xmm1 );
3383  (~C)(i+1UL,j) -= sum( xmm2 );
3384 
3385  for( ; remainder && k<kend; ++k ) {
3386  (~C)(i ,j) -= A(i ,k) * B(k,j);
3387  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j);
3388  }
3389  }
3390  }
3391 
3392  if( i < M )
3393  {
3394  size_t j( 0UL );
3395 
3396  for( ; (j+4UL) <= N; j+=4UL )
3397  {
3398  const size_t kbegin( ( IsUpper<MT4>::value )
3399  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3400  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3401  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
3402 
3403  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3404  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3405 
3406  SIMDType xmm1, xmm2, xmm3, xmm4;
3407  size_t k( kbegin );
3408 
3409  for( ; k<kpos; k+=SIMDSIZE ) {
3410  const SIMDType a1( A.load(i,k) );
3411  xmm1 = xmm1 + a1 * B.load(k,j );
3412  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3413  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3414  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3415  }
3416 
3417  (~C)(i,j ) -= sum( xmm1 );
3418  (~C)(i,j+1UL) -= sum( xmm2 );
3419  (~C)(i,j+2UL) -= sum( xmm3 );
3420  (~C)(i,j+3UL) -= sum( xmm4 );
3421 
3422  for( ; remainder && k<kend; ++k ) {
3423  (~C)(i,j ) -= A(i,k) * B(k,j );
3424  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL);
3425  (~C)(i,j+2UL) -= A(i,k) * B(k,j+2UL);
3426  (~C)(i,j+3UL) -= A(i,k) * B(k,j+3UL);
3427  }
3428  }
3429 
3430  for( ; (j+2UL) <= N; j+=2UL )
3431  {
3432  const size_t kbegin( ( IsUpper<MT4>::value )
3433  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3434  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3435  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
3436 
3437  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3438  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3439 
3440  SIMDType xmm1, xmm2;
3441  size_t k( kbegin );
3442 
3443  for( ; k<kpos; k+=SIMDSIZE ) {
3444  const SIMDType a1( A.load(i,k) );
3445  xmm1 = xmm1 + a1 * B.load(k,j );
3446  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3447  }
3448 
3449  (~C)(i,j ) -= sum( xmm1 );
3450  (~C)(i,j+1UL) -= sum( xmm2 );
3451 
3452  for( ; remainder && k<kend; ++k ) {
3453  (~C)(i,j ) -= A(i,k) * B(k,j );
3454  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL);
3455  }
3456  }
3457 
3458  if( j < N )
3459  {
3460  const size_t kbegin( ( IsUpper<MT4>::value )
3461  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3462  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3463 
3464  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
3465  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3466 
3467  SIMDType xmm1;
3468  size_t k( kbegin );
3469 
3470  for( ; k<kpos; k+=SIMDSIZE ) {
3471  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3472  }
3473 
3474  (~C)(i,j) -= sum( xmm1 );
3475 
3476  for( ; remainder && k<K; ++k ) {
3477  (~C)(i,j) -= A(i,k) * B(k,j);
3478  }
3479  }
3480  }
3481  }
3483  //**********************************************************************************************
3484 
3485  //**Default subtraction assignment to column-major dense matrices (small matrices)**************
3500  template< typename MT3 // Type of the left-hand side target matrix
3501  , typename MT4 // Type of the left-hand side matrix operand
3502  , typename MT5 > // Type of the right-hand side matrix operand
3503  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
3504  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3505  {
3506  const size_t M( A.rows() );
3507  const size_t N( B.columns() );
3508  const size_t K( A.columns() );
3509 
3510  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
3511 
3512  size_t i( 0UL );
3513 
3514  for( ; (i+4UL) <= M; i+=4UL )
3515  {
3516  size_t j( 0UL );
3517 
3518  for( ; (j+2UL) <= N; j+=2UL )
3519  {
3520  const size_t kbegin( ( IsUpper<MT4>::value )
3521  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3522  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3523  const size_t kend( ( IsLower<MT4>::value )
3524  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
3525  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3526 
3527  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3528  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3529 
3530  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3531  size_t k( kbegin );
3532 
3533  for( ; k<kpos; k+=SIMDSIZE ) {
3534  const SIMDType a1( A.load(i ,k) );
3535  const SIMDType a2( A.load(i+1UL,k) );
3536  const SIMDType a3( A.load(i+2UL,k) );
3537  const SIMDType a4( A.load(i+3UL,k) );
3538  const SIMDType b1( B.load(k,j ) );
3539  const SIMDType b2( B.load(k,j+1UL) );
3540  xmm1 = xmm1 + a1 * b1;
3541  xmm2 = xmm2 + a1 * b2;
3542  xmm3 = xmm3 + a2 * b1;
3543  xmm4 = xmm4 + a2 * b2;
3544  xmm5 = xmm5 + a3 * b1;
3545  xmm6 = xmm6 + a3 * b2;
3546  xmm7 = xmm7 + a4 * b1;
3547  xmm8 = xmm8 + a4 * b2;
3548  }
3549 
3550  (~C)(i ,j ) -= sum( xmm1 );
3551  (~C)(i ,j+1UL) -= sum( xmm2 );
3552  (~C)(i+1UL,j ) -= sum( xmm3 );
3553  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3554  (~C)(i+2UL,j ) -= sum( xmm5 );
3555  (~C)(i+2UL,j+1UL) -= sum( xmm6 );
3556  (~C)(i+3UL,j ) -= sum( xmm7 );
3557  (~C)(i+3UL,j+1UL) -= sum( xmm8 );
3558 
3559  for( ; remainder && k<kend; ++k ) {
3560  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3561  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3562  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3563  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3564  (~C)(i+2UL,j ) -= A(i+2UL,k) * B(k,j );
3565  (~C)(i+2UL,j+1UL) -= A(i+2UL,k) * B(k,j+1UL);
3566  (~C)(i+3UL,j ) -= A(i+3UL,k) * B(k,j );
3567  (~C)(i+3UL,j+1UL) -= A(i+3UL,k) * B(k,j+1UL);
3568  }
3569  }
3570 
3571  if( j < N )
3572  {
3573  const size_t kbegin( ( IsUpper<MT4>::value )
3574  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3575  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3576  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
3577 
3578  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3579  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3580 
3581  SIMDType xmm1, xmm2, xmm3, xmm4;
3582  size_t k( kbegin );
3583 
3584  for( ; k<kpos; k+=SIMDSIZE ) {
3585  const SIMDType b1( B.load(k,j) );
3586  xmm1 = xmm1 + A.load(i ,k) * b1;
3587  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3588  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3589  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3590  }
3591 
3592  (~C)(i ,j) -= sum( xmm1 );
3593  (~C)(i+1UL,j) -= sum( xmm2 );
3594  (~C)(i+2UL,j) -= sum( xmm3 );
3595  (~C)(i+3UL,j) -= sum( xmm4 );
3596 
3597  for( ; remainder && k<kend; ++k ) {
3598  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3599  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3600  (~C)(i+2UL,j ) -= A(i+2UL,k) * B(k,j );
3601  (~C)(i+3UL,j ) -= A(i+3UL,k) * B(k,j );
3602  }
3603  }
3604  }
3605 
3606  for( ; (i+2UL) <= M; i+=2UL )
3607  {
3608  size_t j( 0UL );
3609 
3610  for( ; (j+2UL) <= N; j+=2UL )
3611  {
3612  const size_t kbegin( ( IsUpper<MT4>::value )
3613  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3614  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3615  const size_t kend( ( IsLower<MT4>::value )
3616  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
3617  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3618 
3619  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3620  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3621 
3622  SIMDType xmm1, xmm2, xmm3, xmm4;
3623  size_t k( kbegin );
3624 
3625  for( ; k<kpos; k+=SIMDSIZE ) {
3626  const SIMDType a1( A.load(i ,k) );
3627  const SIMDType a2( A.load(i+1UL,k) );
3628  const SIMDType b1( B.load(k,j ) );
3629  const SIMDType b2( B.load(k,j+1UL) );
3630  xmm1 = xmm1 + a1 * b1;
3631  xmm2 = xmm2 + a1 * b2;
3632  xmm3 = xmm3 + a2 * b1;
3633  xmm4 = xmm4 + a2 * b2;
3634  }
3635 
3636  (~C)(i ,j ) -= sum( xmm1 );
3637  (~C)(i ,j+1UL) -= sum( xmm2 );
3638  (~C)(i+1UL,j ) -= sum( xmm3 );
3639  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3640 
3641  for( ; remainder && k<kend; ++k ) {
3642  (~C)(i ,j ) -= A(i ,k) * B(k,j );
3643  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL);
3644  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j );
3645  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL);
3646  }
3647  }
3648 
3649  if( j < N )
3650  {
3651  const size_t kbegin( ( IsUpper<MT4>::value )
3652  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3653  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3654  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
3655 
3656  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3657  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3658 
3659  SIMDType xmm1, xmm2;
3660  size_t k( kbegin );
3661 
3662  for( ; k<kpos; k+=SIMDSIZE ) {
3663  const SIMDType b1( B.load(k,j) );
3664  xmm1 = xmm1 + A.load(i ,k) * b1;
3665  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3666  }
3667 
3668  (~C)(i ,j) -= sum( xmm1 );
3669  (~C)(i+1UL,j) -= sum( xmm2 );
3670 
3671  for( ; remainder && k<kend; ++k ) {
3672  (~C)(i ,j) -= A(i ,k) * B(k,j);
3673  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j);
3674  }
3675  }
3676  }
3677  if( i < M )
3678  {
3679  size_t j( 0UL );
3680 
3681  for( ; (j+2UL) <= N; j+=2UL )
3682  {
3683  const size_t kbegin( ( IsUpper<MT4>::value )
3684  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3685  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3686  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
3687 
3688  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
3689  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3690 
3691  SIMDType xmm1, xmm2;
3692  size_t k( kbegin );
3693 
3694  for( ; k<kpos; k+=SIMDSIZE ) {
3695  const SIMDType a1( A.load(i,k) );
3696  xmm1 = xmm1 + a1 * B.load(k,j );
3697  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3698  }
3699 
3700  (~C)(i,j ) -= sum( xmm1 );
3701  (~C)(i,j+1UL) -= sum( xmm2 );
3702 
3703  for( ; remainder && k<kend; ++k ) {
3704  (~C)(i,j ) -= A(i,k) * B(k,j );
3705  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL);
3706  }
3707  }
3708 
3709  if( j < N )
3710  {
3711  const size_t kbegin( ( IsUpper<MT4>::value )
3712  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
3713  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
3714 
3715  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
3716  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
3717 
3718  SIMDType xmm1;
3719  size_t k( kbegin );
3720 
3721  for( ; k<kpos; k+=SIMDSIZE ) {
3722  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3723  }
3724 
3725  (~C)(i,j) -= sum( xmm1 );
3726 
3727  for( ; remainder && k<K; ++k ) {
3728  (~C)(i,j) -= A(i,k) * B(k,j);
3729  }
3730  }
3731  }
3732  }
3734  //**********************************************************************************************
3735 
3736  //**Default subtraction assignment to dense matrices (large matrices)***************************
3750  template< typename MT3 // Type of the left-hand side target matrix
3751  , typename MT4 // Type of the left-hand side matrix operand
3752  , typename MT5 > // Type of the right-hand side matrix operand
3753  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
3754  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3755  {
3756  selectDefaultSubAssignKernel( ~C, A, B );
3757  }
3759  //**********************************************************************************************
3760 
3761  //**Default subtraction assignment to row-major dense matrices (large matrices)*****************
3776  template< typename MT3 // Type of the left-hand side target matrix
3777  , typename MT4 // Type of the left-hand side matrix operand
3778  , typename MT5 > // Type of the right-hand side matrix operand
3779  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
3780  selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3781  {
3782  // TODO
3783  selectSmallSubAssignKernel( ~C, A, B );
3784  }
3786  //**********************************************************************************************
3787 
3788  //**Default subtraction assignment to column-major dense matrices (large matrices)**************
3803  template< typename MT3 // Type of the left-hand side target matrix
3804  , typename MT4 // Type of the left-hand side matrix operand
3805  , typename MT5 > // Type of the right-hand side matrix operand
3806  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5> >
3807  selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3808  {
3809  // TODO
3810  selectSmallSubAssignKernel( ~C, A, B );
3811  }
3813  //**********************************************************************************************
3814 
3815  //**Default subtraction assignment to dense matrices********************************************
3829  template< typename MT3 // Type of the left-hand side target matrix
3830  , typename MT4 // Type of the left-hand side matrix operand
3831  , typename MT5 > // Type of the right-hand side matrix operand
3832  static inline DisableIf_< UseBlasKernel<MT3,MT4,MT5> >
3833  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3834  {
3835  selectLargeSubAssignKernel( C, A, B );
3836  }
3838  //**********************************************************************************************
3839 
3840  //**BLAS-based subraction assignment to dense matrices******************************************
3841 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
3842 
3855  template< typename MT3 // Type of the left-hand side target matrix
3856  , typename MT4 // Type of the left-hand side matrix operand
3857  , typename MT5 > // Type of the right-hand side matrix operand
3858  static inline EnableIf_< UseBlasKernel<MT3,MT4,MT5> >
3859  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3860  {
3861  typedef ElementType_<MT3> ET;
3862 
3863  if( IsTriangular<MT4>::value ) {
3864  ResultType_<MT3> tmp( serial( B ) );
3865  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(1) );
3866  subAssign( C, tmp );
3867  }
3868  else if( IsTriangular<MT5>::value ) {
3869  ResultType_<MT3> tmp( serial( A ) );
3870  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(1) );
3871  subAssign( C, tmp );
3872  }
3873  else {
3874  gemm( C, A, B, ET(-1), ET(1) );
3875  }
3876  }
3878 #endif
3879  //**********************************************************************************************
3880 
3881  //**Subtraction assignment to sparse matrices***************************************************
3882  // No special implementation for the subtraction assignment to sparse matrices.
3883  //**********************************************************************************************
3884 
3885  //**Multiplication assignment to dense matrices*************************************************
3886  // No special implementation for the multiplication assignment to dense matrices.
3887  //**********************************************************************************************
3888 
3889  //**Multiplication assignment to sparse matrices************************************************
3890  // No special implementation for the multiplication assignment to sparse matrices.
3891  //**********************************************************************************************
3892 
3893  //**SMP assignment to dense matrices************************************************************
3908  template< typename MT // Type of the target dense matrix
3909  , bool SO > // Storage order of the target dense matrix
3910  friend inline EnableIf_< IsEvaluationRequired<MT,MT1,MT2> >
3911  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
3912  {
3914 
3915  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3916  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3917 
3918  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
3919  return;
3920  }
3921  else if( rhs.lhs_.columns() == 0UL ) {
3922  reset( ~lhs );
3923  return;
3924  }
3925 
3926  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3927  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3928 
3929  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3930  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3931  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3932  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3933  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3934  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3935 
3936  smpAssign( ~lhs, A * B );
3937  }
3939  //**********************************************************************************************
3940 
3941  //**SMP assignment to sparse matrices***********************************************************
3956  template< typename MT // Type of the target sparse matrix
3957  , bool SO > // Storage order of the target sparse matrix
3958  friend inline EnableIf_< IsEvaluationRequired<MT,MT1,MT2> >
3959  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
3960  {
3962 
3963  typedef IfTrue_< SO, OppositeType, ResultType > TmpType;
3964 
3970  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( CompositeType_<TmpType> );
3971 
3972  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3973  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3974 
3975  const TmpType tmp( rhs );
3976  smpAssign( ~lhs, tmp );
3977  }
3979  //**********************************************************************************************
3980 
3981  //**SMP addition assignment to dense matrices***************************************************
3997  template< typename MT // Type of the target dense matrix
3998  , bool SO > // Storage order of the target dense matrix
3999  friend inline EnableIf_< IsEvaluationRequired<MT,MT1,MT2> >
4000  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4001  {
4003 
4004  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4005  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4006 
4007  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4008  return;
4009  }
4010 
4011  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4012  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4013 
4014  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4015  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4016  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4017  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4018  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4019  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4020 
4021  smpAddAssign( ~lhs, A * B );
4022  }
4024  //**********************************************************************************************
4025 
4026  //**SMP addition assignment to sparse matrices**************************************************
4027  // No special implementation for the SMP addition assignment to sparse matrices.
4028  //**********************************************************************************************
4029 
4030  //**SMP subtraction assignment to dense matrices************************************************
4046  template< typename MT // Type of the target dense matrix
4047  , bool SO > // Storage order of the target dense matrix
4048  friend inline EnableIf_< IsEvaluationRequired<MT,MT1,MT2> >
4049  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4050  {
4052 
4053  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4054  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4055 
4056  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4057  return;
4058  }
4059 
4060  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4061  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4062 
4063  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4064  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4065  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4066  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4067  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4068  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4069 
4070  smpSubAssign( ~lhs, A * B );
4071  }
4073  //**********************************************************************************************
4074 
4075  //**SMP subtraction assignment to sparse matrices***********************************************
4076  // No special implementation for the SMP subtraction assignment to sparse matrices.
4077  //**********************************************************************************************
4078 
4079  //**SMP multiplication assignment to dense matrices*********************************************
4080  // No special implementation for the SMP multiplication assignment to dense matrices.
4081  //**********************************************************************************************
4082 
4083  //**SMP multiplication assignment to sparse matrices********************************************
4084  // No special implementation for the SMP multiplication assignment to sparse matrices.
4085  //**********************************************************************************************
4086 
4087  //**Compile time checks*************************************************************************
4095  //**********************************************************************************************
4096 };
4097 //*************************************************************************************************
4098 
4099 
4100 
4101 
4102 //=================================================================================================
4103 //
4104 // DMATSCALARMULTEXPR SPECIALIZATION
4105 //
4106 //=================================================================================================
4107 
4108 //*************************************************************************************************
4116 template< typename MT1 // Type of the left-hand side dense matrix
4117  , typename MT2 // Type of the right-hand side dense matrix
4118  , typename ST > // Type of the right-hand side scalar value
4119 class DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >
4120  : public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
4121  , private MatScalarMultExpr
4122  , private Computation
4123 {
4124  private:
4125  //**Type definitions****************************************************************************
4126  typedef DMatTDMatMultExpr<MT1,MT2> MMM;
4127  typedef ResultType_<MMM> RES;
4128  typedef ResultType_<MT1> RT1;
4129  typedef ResultType_<MT2> RT2;
4130  typedef ElementType_<RT1> ET1;
4131  typedef ElementType_<RT2> ET2;
4132  typedef CompositeType_<MT1> CT1;
4133  typedef CompositeType_<MT2> CT2;
4134  //**********************************************************************************************
4135 
4136  //**********************************************************************************************
4138  enum : bool { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
4139  //**********************************************************************************************
4140 
4141  //**********************************************************************************************
4143  enum : bool { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
4144  //**********************************************************************************************
4145 
4146  //**********************************************************************************************
4148 
4151  template< typename T1, typename T2, typename T3 >
4152  struct IsEvaluationRequired {
4153  enum : bool { value = ( evaluateLeft || evaluateRight ) };
4154  };
4155  //**********************************************************************************************
4156 
4157  //**********************************************************************************************
4159 
4161  template< typename T1, typename T2, typename T3, typename T4 >
4162  struct UseBlasKernel {
4164  HasMutableDataAccess<T1>::value &&
4165  HasConstDataAccess<T2>::value &&
4166  HasConstDataAccess<T3>::value &&
4167  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4168  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
4169  IsBLASCompatible< ElementType_<T1> >::value &&
4170  IsBLASCompatible< ElementType_<T2> >::value &&
4171  IsBLASCompatible< ElementType_<T3> >::value &&
4172  IsSame< ElementType_<T1>, ElementType_<T2> >::value &&
4173  IsSame< ElementType_<T1>, ElementType_<T3> >::value &&
4174  !( IsBuiltin< ElementType_<T1> >::value && IsComplex<T4>::value ) };
4175  };
4176  //**********************************************************************************************
4177 
4178  //**********************************************************************************************
4180 
4182  template< typename T1, typename T2, typename T3, typename T4 >
4183  struct UseVectorizedDefaultKernel {
4184  enum : bool { value = useOptimizedKernels &&
4185  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4186  T1::simdEnabled && T2::simdEnabled && T3::simdEnabled &&
4187  AreSIMDCombinable< ElementType_<T1>
4188  , ElementType_<T2>
4189  , ElementType_<T3>
4190  , T4 >::value &&
4191  HasSIMDAdd< ElementType_<T2>, ElementType_<T3> >::value &&
4192  HasSIMDMult< ElementType_<T2>, ElementType_<T3> >::value };
4193  };
4194  //**********************************************************************************************
4195 
4196  public:
4197  //**Type definitions****************************************************************************
4198  typedef DMatScalarMultExpr<MMM,ST,false> This;
4199  typedef MultTrait_<RES,ST> ResultType;
4200  typedef OppositeType_<ResultType> OppositeType;
4201  typedef TransposeType_<ResultType> TransposeType;
4202  typedef ElementType_<ResultType> ElementType;
4203  typedef SIMDTrait_<ElementType> SIMDType;
4204  typedef const ElementType ReturnType;
4205  typedef const ResultType CompositeType;
4206 
4208  typedef const DMatTDMatMultExpr<MT1,MT2> LeftOperand;
4209 
4211  typedef ST RightOperand;
4212 
4214  typedef IfTrue_< evaluateLeft, const RT1, CT1 > LT;
4215 
4217  typedef IfTrue_< evaluateRight, const RT2, CT2 > RT;
4218  //**********************************************************************************************
4219 
4220  //**Compilation flags***************************************************************************
4222  enum : bool { simdEnabled = !IsDiagonal<MT1>::value && !IsDiagonal<MT2>::value &&
4223  MT1::simdEnabled && MT2::simdEnabled &&
4224  AreSIMDCombinable<ET1,ET2,ST>::value &&
4225  HasSIMDAdd<ET1,ET2>::value &&
4226  HasSIMDMult<ET1,ET2>::value };
4227 
4229  enum : bool { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
4230  !evaluateRight && MT2::smpAssignable };
4231  //**********************************************************************************************
4232 
4233  //**SIMD properties*****************************************************************************
4235  enum : size_t { SIMDSIZE = SIMDTrait<ElementType>::size };
4236  //**********************************************************************************************
4237 
4238  //**Constructor*********************************************************************************
4244  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
4245  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
4246  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
4247  {}
4248  //**********************************************************************************************
4249 
4250  //**Access operator*****************************************************************************
4257  inline ReturnType operator()( size_t i, size_t j ) const {
4258  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
4259  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
4260  return matrix_(i,j) * scalar_;
4261  }
4262  //**********************************************************************************************
4263 
4264  //**At function*********************************************************************************
4272  inline ReturnType at( size_t i, size_t j ) const {
4273  if( i >= matrix_.rows() ) {
4274  BLAZE_THROW_OUT_OF_RANGE( "Invalid row access index" );
4275  }
4276  if( j >= matrix_.columns() ) {
4277  BLAZE_THROW_OUT_OF_RANGE( "Invalid column access index" );
4278  }
4279  return (*this)(i,j);
4280  }
4281  //**********************************************************************************************
4282 
4283  //**Rows function*******************************************************************************
4288  inline size_t rows() const {
4289  return matrix_.rows();
4290  }
4291  //**********************************************************************************************
4292 
4293  //**Columns function****************************************************************************
4298  inline size_t columns() const {
4299  return matrix_.columns();
4300  }
4301  //**********************************************************************************************
4302 
4303  //**Left operand access*************************************************************************
4308  inline LeftOperand leftOperand() const {
4309  return matrix_;
4310  }
4311  //**********************************************************************************************
4312 
4313  //**Right operand access************************************************************************
4318  inline RightOperand rightOperand() const {
4319  return scalar_;
4320  }
4321  //**********************************************************************************************
4322 
4323  //**********************************************************************************************
4329  template< typename T >
4330  inline bool canAlias( const T* alias ) const {
4331  return matrix_.canAlias( alias );
4332  }
4333  //**********************************************************************************************
4334 
4335  //**********************************************************************************************
4341  template< typename T >
4342  inline bool isAliased( const T* alias ) const {
4343  return matrix_.isAliased( alias );
4344  }
4345  //**********************************************************************************************
4346 
4347  //**********************************************************************************************
4352  inline bool isAligned() const {
4353  return matrix_.isAligned();
4354  }
4355  //**********************************************************************************************
4356 
4357  //**********************************************************************************************
4362  inline bool canSMPAssign() const noexcept {
4363  return ( !BLAZE_BLAS_IS_PARALLEL ||
4364  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
4365  ( rows() * columns() >= SMP_DMATTDMATMULT_THRESHOLD );
4366  }
4367  //**********************************************************************************************
4368 
4369  private:
4370  //**Member variables****************************************************************************
4371  LeftOperand matrix_;
4372  RightOperand scalar_;
4373  //**********************************************************************************************
4374 
4375  //**Assignment to dense matrices****************************************************************
4387  template< typename MT // Type of the target dense matrix
4388  , bool SO > // Storage order of the target dense matrix
4389  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4390  {
4392 
4393  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4394  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4395 
4396  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
4397  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
4398 
4399  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4400  return;
4401  }
4402  else if( left.columns() == 0UL ) {
4403  reset( ~lhs );
4404  return;
4405  }
4406 
4407  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4408  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4409 
4410  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4411  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4412  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4413  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4414  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4415  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4416 
4417  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
4418  }
4419  //**********************************************************************************************
4420 
4421  //**Assignment to dense matrices (kernel selection)*********************************************
4432  template< typename MT3 // Type of the left-hand side target matrix
4433  , typename MT4 // Type of the left-hand side matrix operand
4434  , typename MT5 // Type of the right-hand side matrix operand
4435  , typename ST2 > // Type of the scalar value
4436  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4437  {
4438  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
4439  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
4440  selectSmallAssignKernel( C, A, B, scalar );
4441  else
4442  selectBlasAssignKernel( C, A, B, scalar );
4443  }
4444  //**********************************************************************************************
4445 
4446  //**Default assignment to row-major dense matrices (general/general)****************************
4460  template< typename MT3 // Type of the left-hand side target matrix
4461  , typename MT4 // Type of the left-hand side matrix operand
4462  , typename MT5 // Type of the right-hand side matrix operand
4463  , typename ST2 > // Type of the scalar value
4464  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
4465  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4466  {
4467  const size_t M( A.rows() );
4468  const size_t N( B.columns() );
4469  const size_t K( A.columns() );
4470 
4471  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
4472  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
4473  :( 0UL ) );
4474  const size_t iend( ( IsStrictlyUpper<MT4>::value )
4475  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
4476  :( M ) );
4477  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4478 
4479  for( size_t i=0UL; i<ibegin; ++i ) {
4480  for( size_t j=0UL; j<N; ++j ) {
4481  reset( (~C)(i,j) );
4482  }
4483  }
4484  for( size_t i=ibegin; i<iend; ++i )
4485  {
4486  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4487  ?( ( IsStrictlyUpper<MT4>::value )
4488  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
4489  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
4490  :( IsStrictlyUpper<MT5>::value ? 1UL : 0UL ) );
4491  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
4492  ?( ( IsStrictlyLower<MT4>::value )
4493  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
4494  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
4495  :( IsStrictlyLower<MT5>::value ? N-1UL : N ) );
4496  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4497 
4498  for( size_t j=0UL; j<jbegin; ++j ) {
4499  reset( (~C)(i,j) );
4500  }
4501  for( size_t j=jbegin; j<jend; ++j )
4502  {
4503  const size_t kbegin( ( IsUpper<MT4>::value )
4504  ?( ( IsLower<MT5>::value )
4505  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4506  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4507  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4508  :( ( IsLower<MT5>::value )
4509  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4510  :( 0UL ) ) );
4511  const size_t kend( ( IsLower<MT4>::value )
4512  ?( ( IsUpper<MT5>::value )
4513  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
4514  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
4515  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4516  :( ( IsUpper<MT5>::value )
4517  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4518  :( K ) ) );
4519  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
4520 
4521  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
4522  for( size_t k=kbegin+1UL; k<kend; ++k ) {
4523  (~C)(i,j) += A(i,k) * B(k,j);
4524  }
4525  (~C)(i,j) *= scalar;
4526  }
4527  for( size_t j=jend; j<N; ++j ) {
4528  reset( (~C)(i,j) );
4529  }
4530  }
4531  for( size_t i=iend; i<M; ++i ) {
4532  for( size_t j=0UL; j<N; ++j ) {
4533  reset( (~C)(i,j) );
4534  }
4535  }
4536  }
4537  //**********************************************************************************************
4538 
4539  //**Default assignment to column-major dense matrices (general/general)*************************
4553  template< typename MT3 // Type of the left-hand side target matrix
4554  , typename MT4 // Type of the left-hand side matrix operand
4555  , typename MT5 // Type of the right-hand side matrix operand
4556  , typename ST2 > // Type of the scalar value
4557  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
4558  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4559  {
4560  const size_t M( A.rows() );
4561  const size_t N( B.columns() );
4562  const size_t K( A.columns() );
4563 
4564  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
4565  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
4566  :( 0UL ) );
4567  const size_t jend( ( IsStrictlyLower<MT5>::value )
4568  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
4569  :( N ) );
4570  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4571 
4572  for( size_t j=0UL; j<jbegin; ++j ) {
4573  for( size_t i=0UL; i<M; ++i ) {
4574  reset( (~C)(i,j) );
4575  }
4576  }
4577  for( size_t j=jbegin; j<jend; ++j )
4578  {
4579  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
4580  ?( ( IsStrictlyLower<MT4>::value )
4581  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
4582  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4583  :( IsStrictlyLower<MT4>::value ? 1UL : 0UL ) );
4584  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4585  ?( ( IsStrictlyUpper<MT4>::value )
4586  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
4587  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
4588  :( IsStrictlyUpper<MT4>::value ? M-1UL : M ) );
4589  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4590 
4591  for( size_t i=0UL; i<ibegin; ++i ) {
4592  reset( (~C)(i,j) );
4593  }
4594  for( size_t i=ibegin; i<iend; ++i )
4595  {
4596  const size_t kbegin( ( IsUpper<MT4>::value )
4597  ?( ( IsLower<MT5>::value )
4598  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4599  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4600  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4601  :( ( IsLower<MT5>::value )
4602  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4603  :( 0UL ) ) );
4604  const size_t kend( ( IsLower<MT4>::value )
4605  ?( ( IsUpper<MT5>::value )
4606  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
4607  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
4608  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4609  :( ( IsUpper<MT5>::value )
4610  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4611  :( K ) ) );
4612  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
4613 
4614  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
4615  for( size_t k=kbegin+1UL; k<kend; ++k ) {
4616  (~C)(i,j) += A(i,k) * B(k,j);
4617  }
4618  (~C)(i,j) *= scalar;
4619  }
4620  for( size_t i=iend; i<M; ++i ) {
4621  reset( (~C)(i,j) );
4622  }
4623  }
4624  for( size_t j=jend; j<N; ++j ) {
4625  for( size_t i=0UL; i<M; ++i ) {
4626  reset( (~C)(i,j) );
4627  }
4628  }
4629  }
4630  //**********************************************************************************************
4631 
4632  //**Default assignment to row-major dense matrices (general/diagonal)***************************
4646  template< typename MT3 // Type of the left-hand side target matrix
4647  , typename MT4 // Type of the left-hand side matrix operand
4648  , typename MT5 // Type of the right-hand side matrix operand
4649  , typename ST2 > // Type of the scalar value
4650  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
4651  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4652  {
4653  const size_t M( A.rows() );
4654  const size_t N( B.columns() );
4655 
4656  for( size_t i=0UL; i<M; ++i )
4657  {
4658  const size_t jbegin( ( IsUpper<MT4>::value )
4659  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4660  :( 0UL ) );
4661  const size_t jend( ( IsLower<MT4>::value )
4662  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4663  :( N ) );
4664  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4665 
4666  if( IsUpper<MT4>::value ) {
4667  for( size_t j=0UL; j<jbegin; ++j ) {
4668  reset( (~C)(i,j) );
4669  }
4670  }
4671  for( size_t j=jbegin; j<jend; ++j ) {
4672  (~C)(i,j) = A(i,j) * B(j,j) * scalar;
4673  }
4674  if( IsLower<MT4>::value ) {
4675  for( size_t j=jend; j<N; ++j ) {
4676  reset( (~C)(i,j) );
4677  }
4678  }
4679  }
4680  }
4681  //**********************************************************************************************
4682 
4683  //**Default assignment to column-major dense matrices (general/diagonal)************************
4697  template< typename MT3 // Type of the left-hand side target matrix
4698  , typename MT4 // Type of the left-hand side matrix operand
4699  , typename MT5 // Type of the right-hand side matrix operand
4700  , typename ST2 > // Type of the scalar value
4701  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
4702  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4703  {
4704  const size_t M( A.rows() );
4705  const size_t N( B.columns() );
4706 
4707  const size_t block( BLOCK_SIZE );
4708 
4709  for( size_t jj=0UL; jj<N; jj+=block ) {
4710  const size_t jend( min( N, jj+block ) );
4711  for( size_t ii=0UL; ii<M; ii+=block ) {
4712  const size_t iend( min( M, ii+block ) );
4713  for( size_t j=jj; j<jend; ++j )
4714  {
4715  const size_t ibegin( ( IsLower<MT4>::value )
4716  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
4717  :( ii ) );
4718  const size_t ipos( ( IsUpper<MT4>::value )
4719  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
4720  :( iend ) );
4721 
4722  if( IsLower<MT4>::value ) {
4723  for( size_t i=ii; i<ibegin; ++i ) {
4724  reset( (~C)(i,j) );
4725  }
4726  }
4727  for( size_t i=ibegin; i<ipos; ++i ) {
4728  (~C)(i,j) = A(i,j) * B(j,j) * scalar;
4729  }
4730  if( IsUpper<MT4>::value ) {
4731  for( size_t i=ipos; i<iend; ++i ) {
4732  reset( (~C)(i,j) );
4733  }
4734  }
4735  }
4736  }
4737  }
4738  }
4739  //**********************************************************************************************
4740 
4741  //**Default assignment to row-major dense matrices (diagonal/general)***************************
4755  template< typename MT3 // Type of the left-hand side target matrix
4756  , typename MT4 // Type of the left-hand side matrix operand
4757  , typename MT5 // Type of the right-hand side matrix operand
4758  , typename ST2 > // Type of the scalar value
4759  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
4760  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4761  {
4762  const size_t M( A.rows() );
4763  const size_t N( B.columns() );
4764 
4765  const size_t block( BLOCK_SIZE );
4766 
4767  for( size_t ii=0UL; ii<M; ii+=block ) {
4768  const size_t iend( min( M, ii+block ) );
4769  for( size_t jj=0UL; jj<N; jj+=block ) {
4770  const size_t jend( min( N, jj+block ) );
4771  for( size_t i=ii; i<iend; ++i )
4772  {
4773  const size_t jbegin( ( IsUpper<MT5>::value )
4774  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
4775  :( jj ) );
4776  const size_t jpos( ( IsLower<MT5>::value )
4777  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
4778  :( jend ) );
4779 
4780  if( IsUpper<MT5>::value ) {
4781  for( size_t j=jj; j<jbegin; ++j ) {
4782  reset( (~C)(i,j) );
4783  }
4784  }
4785  for( size_t j=jbegin; j<jpos; ++j ) {
4786  (~C)(i,j) = A(i,i) * B(i,j) * scalar;
4787  }
4788  if( IsLower<MT5>::value ) {
4789  for( size_t j=jpos; j<jend; ++j ) {
4790  reset( (~C)(i,j) );
4791  }
4792  }
4793  }
4794  }
4795  }
4796  }
4797  //**********************************************************************************************
4798 
4799  //**Default assignment to column-major dense matrices (diagonal/general)************************
4813  template< typename MT3 // Type of the left-hand side target matrix
4814  , typename MT4 // Type of the left-hand side matrix operand
4815  , typename MT5 // Type of the right-hand side matrix operand
4816  , typename ST2 > // Type of the scalar value
4817  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
4818  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4819  {
4820  const size_t M( A.rows() );
4821  const size_t N( B.columns() );
4822 
4823  for( size_t j=0UL; j<N; ++j )
4824  {
4825  const size_t ibegin( ( IsLower<MT5>::value )
4826  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4827  :( 0UL ) );
4828  const size_t iend( ( IsUpper<MT5>::value )
4829  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4830  :( M ) );
4831  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4832 
4833  if( IsLower<MT5>::value ) {
4834  for( size_t i=0UL; i<ibegin; ++i ) {
4835  reset( (~C)(i,j) );
4836  }
4837  }
4838  for( size_t i=ibegin; i<iend; ++i ) {
4839  (~C)(i,j) = A(i,i) * B(i,j) * scalar;
4840  }
4841  if( IsUpper<MT5>::value ) {
4842  for( size_t i=iend; i<M; ++i ) {
4843  reset( (~C)(i,j) );
4844  }
4845  }
4846  }
4847  }
4848  //**********************************************************************************************
4849 
4850  //**Default assignment to dense matrices (diagonal/diagonal)************************************
4864  template< typename MT3 // Type of the left-hand side target matrix
4865  , typename MT4 // Type of the left-hand side matrix operand
4866  , typename MT5 // Type of the right-hand side matrix operand
4867  , typename ST2 > // Type of the scalar value
4868  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
4869  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4870  {
4871  reset( C );
4872 
4873  for( size_t i=0UL; i<A.rows(); ++i ) {
4874  C(i,i) = A(i,i) * B(i,i) * scalar;
4875  }
4876  }
4877  //**********************************************************************************************
4878 
4879  //**Default assignment to dense matrices (small matrices)***************************************
4893  template< typename MT3 // Type of the left-hand side target matrix
4894  , typename MT4 // Type of the left-hand side matrix operand
4895  , typename MT5 // Type of the right-hand side matrix operand
4896  , typename ST2 > // Type of the scalar value
4897  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
4898  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4899  {
4900  selectDefaultAssignKernel( C, A, B, scalar );
4901  }
4902  //**********************************************************************************************
4903 
4904  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
4919  template< typename MT3 // Type of the left-hand side target matrix
4920  , typename MT4 // Type of the left-hand side matrix operand
4921  , typename MT5 // Type of the right-hand side matrix operand
4922  , typename ST2 > // Type of the scalar value
4923  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
4924  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4925  {
4926  const size_t M( A.rows() );
4927  const size_t N( B.columns() );
4928  const size_t K( A.columns() );
4929 
4930  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
4931 
4932  size_t i( 0UL );
4933 
4934  for( ; (i+2UL) <= M; i+=2UL )
4935  {
4936  size_t j( 0UL );
4937 
4938  for( ; (j+4UL) <= N; j+=4UL )
4939  {
4940  const size_t kbegin( ( IsUpper<MT4>::value )
4941  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
4942  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
4943  const size_t kend( ( IsLower<MT4>::value )
4944  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
4945  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
4946 
4947  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
4948  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
4949 
4950  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4951  size_t k( kbegin );
4952 
4953  for( ; k<kpos; k+=SIMDSIZE ) {
4954  const SIMDType a1( A.load(i ,k) );
4955  const SIMDType a2( A.load(i+1UL,k) );
4956  const SIMDType b1( B.load(k,j ) );
4957  const SIMDType b2( B.load(k,j+1UL) );
4958  const SIMDType b3( B.load(k,j+2UL) );
4959  const SIMDType b4( B.load(k,j+3UL) );
4960  xmm1 = xmm1 + a1 * b1;
4961  xmm2 = xmm2 + a1 * b2;
4962  xmm3 = xmm3 + a1 * b3;
4963  xmm4 = xmm4 + a1 * b4;
4964  xmm5 = xmm5 + a2 * b1;
4965  xmm6 = xmm6 + a2 * b2;
4966  xmm7 = xmm7 + a2 * b3;
4967  xmm8 = xmm8 + a2 * b4;
4968  }
4969 
4970  (~C)(i ,j ) = sum( xmm1 ) * scalar;
4971  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
4972  (~C)(i ,j+2UL) = sum( xmm3 ) * scalar;
4973  (~C)(i ,j+3UL) = sum( xmm4 ) * scalar;
4974  (~C)(i+1UL,j ) = sum( xmm5 ) * scalar;
4975  (~C)(i+1UL,j+1UL) = sum( xmm6 ) * scalar;
4976  (~C)(i+1UL,j+2UL) = sum( xmm7 ) * scalar;
4977  (~C)(i+1UL,j+3UL) = sum( xmm8 ) * scalar;
4978 
4979  for( ; remainder && k<kend; ++k ) {
4980  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
4981  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
4982  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL) * scalar;
4983  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL) * scalar;
4984  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
4985  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
4986  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL) * scalar;
4987  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL) * scalar;
4988  }
4989  }
4990 
4991  for( ; (j+2UL) <= N; j+=2UL )
4992  {
4993  const size_t kbegin( ( IsUpper<MT4>::value )
4994  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
4995  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
4996  const size_t kend( ( IsLower<MT4>::value )
4997  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
4998  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
4999 
5000  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5001  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5002 
5003  SIMDType xmm1, xmm2, xmm3, xmm4;
5004  size_t k( kbegin );
5005 
5006  for( ; k<kpos; k+=SIMDSIZE ) {
5007  const SIMDType a1( A.load(i ,k) );
5008  const SIMDType a2( A.load(i+1UL,k) );
5009  const SIMDType b1( B.load(k,j ) );
5010  const SIMDType b2( B.load(k,j+1UL) );
5011  xmm1 = xmm1 + a1 * b1;
5012  xmm2 = xmm2 + a1 * b2;
5013  xmm3 = xmm3 + a2 * b1;
5014  xmm4 = xmm4 + a2 * b2;
5015  }
5016 
5017  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5018  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5019  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5020  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5021 
5022  for( ; remainder && k<kend; ++k ) {
5023  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5024  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5025  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5026  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5027  }
5028  }
5029 
5030  if( j < N )
5031  {
5032  const size_t kbegin( ( IsUpper<MT4>::value )
5033  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5034  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5035  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
5036 
5037  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5038  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5039 
5040  SIMDType xmm1, xmm2;
5041  size_t k( kbegin );
5042 
5043  for( ; k<kpos; k+=SIMDSIZE ) {
5044  const SIMDType b1( B.load(k,j) );
5045  xmm1 = xmm1 + A.load(i ,k) * b1;
5046  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
5047  }
5048 
5049  (~C)(i ,j) = sum( xmm1 ) * scalar;
5050  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5051 
5052  for( ; remainder && k<kend; ++k ) {
5053  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
5054  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
5055  }
5056  }
5057  }
5058 
5059  if( i < M )
5060  {
5061  size_t j( 0UL );
5062 
5063  for( ; (j+4UL) <= N; j+=4UL )
5064  {
5065  const size_t kbegin( ( IsUpper<MT4>::value )
5066  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5067  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5068  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
5069 
5070  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5071  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5072 
5073  SIMDType xmm1, xmm2, xmm3, xmm4;
5074  size_t k( kbegin );
5075 
5076  for( ; k<kpos; k+=SIMDSIZE ) {
5077  const SIMDType a1( A.load(i,k) );
5078  xmm1 = xmm1 + a1 * B.load(k,j );
5079  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
5080  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
5081  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
5082  }
5083 
5084  (~C)(i,j ) = sum( xmm1 ) * scalar;
5085  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5086  (~C)(i,j+2UL) = sum( xmm3 ) * scalar;
5087  (~C)(i,j+3UL) = sum( xmm4 ) * scalar;
5088 
5089  for( ; remainder && k<kend; ++k ) {
5090  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
5091  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
5092  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL) * scalar;
5093  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL) * scalar;
5094  }
5095  }
5096 
5097  for( ; (j+2UL) <= N; j+=2UL )
5098  {
5099  const size_t kbegin( ( IsUpper<MT4>::value )
5100  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5101  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5102  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
5103 
5104  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5105  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5106 
5107  SIMDType xmm1, xmm2;
5108  size_t k( kbegin );
5109 
5110  for( ; k<kpos; k+=SIMDSIZE ) {
5111  const SIMDType a1( A.load(i,k) );
5112  xmm1 = xmm1 + a1 * B.load(k,j );
5113  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
5114  }
5115 
5116  (~C)(i,j ) = sum( xmm1 ) * scalar;
5117  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5118 
5119  for( ; remainder && k<kend; ++k ) {
5120  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
5121  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
5122  }
5123  }
5124 
5125  if( j < N )
5126  {
5127  const size_t kbegin( ( IsUpper<MT4>::value )
5128  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5129  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5130 
5131  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
5132  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5133 
5134  SIMDType xmm1;
5135  size_t k( kbegin );
5136 
5137  for( ; k<kpos; k+=SIMDSIZE ) {
5138  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
5139  }
5140 
5141  (~C)(i,j) = sum( xmm1 ) * scalar;
5142 
5143  for( ; remainder && k<K; ++k ) {
5144  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
5145  }
5146  }
5147  }
5148  }
5149  //**********************************************************************************************
5150 
5151  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
5166  template< typename MT3 // Type of the left-hand side target matrix
5167  , typename MT4 // Type of the left-hand side matrix operand
5168  , typename MT5 // Type of the right-hand side matrix operand
5169  , typename ST2 > // Type of the scalar value
5170  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
5171  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5172  {
5173  const size_t M( A.rows() );
5174  const size_t N( B.columns() );
5175  const size_t K( A.columns() );
5176 
5177  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
5178 
5179  size_t i( 0UL );
5180 
5181  for( ; (i+4UL) <= M; i+=4UL )
5182  {
5183  size_t j( 0UL );
5184 
5185  for( ; (j+2UL) <= N; j+=2UL )
5186  {
5187  const size_t kbegin( ( IsUpper<MT4>::value )
5188  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5189  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5190  const size_t kend( ( IsLower<MT4>::value )
5191  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
5192  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5193 
5194  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5195  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5196 
5197  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5198  size_t k( kbegin );
5199 
5200  for( ; k<kpos; k+=SIMDSIZE ) {
5201  const SIMDType a1( A.load(i ,k) );
5202  const SIMDType a2( A.load(i+1UL,k) );
5203  const SIMDType a3( A.load(i+2UL,k) );
5204  const SIMDType a4( A.load(i+3UL,k) );
5205  const SIMDType b1( B.load(k,j ) );
5206  const SIMDType b2( B.load(k,j+1UL) );
5207  xmm1 = xmm1 + a1 * b1;
5208  xmm2 = xmm2 + a1 * b2;
5209  xmm3 = xmm3 + a2 * b1;
5210  xmm4 = xmm4 + a2 * b2;
5211  xmm5 = xmm5 + a3 * b1;
5212  xmm6 = xmm6 + a3 * b2;
5213  xmm7 = xmm7 + a4 * b1;
5214  xmm8 = xmm8 + a4 * b2;
5215  }
5216 
5217  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5218  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5219  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5220  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5221  (~C)(i+2UL,j ) = sum( xmm5 ) * scalar;
5222  (~C)(i+2UL,j+1UL) = sum( xmm6 ) * scalar;
5223  (~C)(i+3UL,j ) = sum( xmm7 ) * scalar;
5224  (~C)(i+3UL,j+1UL) = sum( xmm8 ) * scalar;
5225 
5226  for( ; remainder && k<kend; ++k ) {
5227  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5228  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5229  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5230  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5231  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j ) * scalar;
5232  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL) * scalar;
5233  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j ) * scalar;
5234  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL) * scalar;
5235  }
5236  }
5237 
5238  if( j < N )
5239  {
5240  const size_t kbegin( ( IsUpper<MT4>::value )
5241  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5242  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5243  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
5244 
5245  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5246  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5247 
5248  SIMDType xmm1, xmm2, xmm3, xmm4;
5249  size_t k( kbegin );
5250 
5251  for( ; k<kpos; k+=SIMDSIZE ) {
5252  const SIMDType b1( B.load(k,j) );
5253  xmm1 = xmm1 + A.load(i ,k) * b1;
5254  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
5255  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
5256  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
5257  }
5258 
5259  (~C)(i ,j) = sum( xmm1 ) * scalar;
5260  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5261  (~C)(i+2UL,j) = sum( xmm3 ) * scalar;
5262  (~C)(i+3UL,j) = sum( xmm4 ) * scalar;
5263 
5264  for( ; remainder && k<kend; ++k ) {
5265  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
5266  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
5267  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j) * scalar;
5268  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j) * scalar;
5269  }
5270  }
5271  }
5272 
5273  for( ; (i+2UL) <= M; i+=2UL )
5274  {
5275  size_t j( 0UL );
5276 
5277  for( ; (j+2UL) <= N; j+=2UL )
5278  {
5279  const size_t kbegin( ( IsUpper<MT4>::value )
5280  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5281  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5282  const size_t kend( ( IsLower<MT4>::value )
5283  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
5284  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5285 
5286  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5287  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5288 
5289  SIMDType xmm1, xmm2, xmm3, xmm4;
5290  size_t k( kbegin );
5291 
5292  for( ; k<kpos; k+=SIMDSIZE ) {
5293  const SIMDType a1( A.load(i ,k) );
5294  const SIMDType a2( A.load(i+1UL,k) );
5295  const SIMDType b1( B.load(k,j ) );
5296  const SIMDType b2( B.load(k,j+1UL) );
5297  xmm1 = xmm1 + a1 * b1;
5298  xmm2 = xmm2 + a1 * b2;
5299  xmm3 = xmm3 + a2 * b1;
5300  xmm4 = xmm4 + a2 * b2;
5301  }
5302 
5303  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5304  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5305  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5306  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5307 
5308  for( ; remainder && k<kend; ++k ) {
5309  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5310  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5311  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5312  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
5313  }
5314  }
5315 
5316  if( j < N )
5317  {
5318  const size_t kbegin( ( IsUpper<MT4>::value )
5319  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5320  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5321  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
5322 
5323  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5324  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5325 
5326  SIMDType xmm1, xmm2;
5327  size_t k( kbegin );
5328 
5329  for( ; k<kpos; k+=SIMDSIZE ) {
5330  const SIMDType b1( B.load(k,j) );
5331  xmm1 = xmm1 + A.load(i ,k) * b1;
5332  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
5333  }
5334 
5335  (~C)(i ,j) = sum( xmm1 ) * scalar;
5336  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5337 
5338  for( ; remainder && k<kend; ++k ) {
5339  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
5340  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
5341  }
5342  }
5343  }
5344 
5345  if( i < M )
5346  {
5347  size_t j( 0UL );
5348 
5349  for( ; (j+2UL) <= N; j+=2UL )
5350  {
5351  const size_t kbegin( ( IsUpper<MT4>::value )
5352  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5353  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5354  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
5355 
5356  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5357  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5358 
5359  SIMDType xmm1, xmm2;
5360  size_t k( kbegin );
5361 
5362  for( ; k<kpos; k+=SIMDSIZE ) {
5363  const SIMDType a1( A.load(i,k) );
5364  xmm1 = xmm1 + a1 * B.load(k,j );
5365  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
5366  }
5367 
5368  (~C)(i,j ) = sum( xmm1 ) * scalar;
5369  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5370 
5371  for( ; remainder && k<kend; ++k ) {
5372  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
5373  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
5374  }
5375  }
5376 
5377  if( j < N )
5378  {
5379  const size_t kbegin( ( IsUpper<MT4>::value )
5380  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5381  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5382 
5383  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
5384  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5385 
5386  SIMDType xmm1;
5387  size_t k( kbegin );
5388 
5389  for( ; k<kpos; k+=SIMDSIZE ) {
5390  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
5391  }
5392 
5393  (~C)(i,j) = sum( xmm1 ) * scalar;
5394 
5395  for( ; remainder && k<K; ++k ) {
5396  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
5397  }
5398  }
5399  }
5400  }
5401  //**********************************************************************************************
5402 
5403  //**Default assignment to dense matrices (large matrices)***************************************
5417  template< typename MT3 // Type of the left-hand side target matrix
5418  , typename MT4 // Type of the left-hand side matrix operand
5419  , typename MT5 // Type of the right-hand side matrix operand
5420  , typename ST2 > // Type of the scalar value
5421  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
5422  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5423  {
5424  selectDefaultAssignKernel( C, A, B, scalar );
5425  }
5426  //**********************************************************************************************
5427 
5428  //**Vectorized default assignment to row-major dense matrices (large matrices)******************
5443  template< typename MT3 // Type of the left-hand side target matrix
5444  , typename MT4 // Type of the left-hand side matrix operand
5445  , typename MT5 // Type of the right-hand side matrix operand
5446  , typename ST2 > // Type of the scalar value
5447  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
5448  selectLargeAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5449  {
5450  // TODO
5451  selectSmallAssignKernel( ~C, A, B, scalar );
5452  }
5453  //**********************************************************************************************
5454 
5455  //**Vectorized default assignment to column-major dense matrices (large matrices)***************
5470  template< typename MT3 // Type of the left-hand side target matrix
5471  , typename MT4 // Type of the left-hand side matrix operand
5472  , typename MT5 // Type of the right-hand side matrix operand
5473  , typename ST2 > // Type of the scalar value
5474  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
5475  selectLargeAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5476  {
5477  // TODO
5478  selectSmallAssignKernel( ~C, A, B, scalar );
5479  }
5480  //**********************************************************************************************
5481 
5482  //**BLAS-based assignment to dense matrices (default)*******************************************
5496  template< typename MT3 // Type of the left-hand side target matrix
5497  , typename MT4 // Type of the left-hand side matrix operand
5498  , typename MT5 // Type of the right-hand side matrix operand
5499  , typename ST2 > // Type of the scalar value
5500  static inline DisableIf_< UseBlasKernel<MT3,MT4,MT5,ST2> >
5501  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5502  {
5503  selectLargeAssignKernel( C, A, B, scalar );
5504  }
5505  //**********************************************************************************************
5506 
5507  //**BLAS-based assignment to dense matrices*****************************************************
5508 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
5509 
5522  template< typename MT3 // Type of the left-hand side target matrix
5523  , typename MT4 // Type of the left-hand side matrix operand
5524  , typename MT5 // Type of the right-hand side matrix operand
5525  , typename ST2 > // Type of the scalar value
5526  static inline EnableIf_< UseBlasKernel<MT3,MT4,MT5,ST2> >
5527  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5528  {
5529  typedef ElementType_<MT3> ET;
5530 
5531  if( IsTriangular<MT4>::value ) {
5532  assign( C, B );
5533  trmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5534  }
5535  else if( IsTriangular<MT5>::value ) {
5536  assign( C, A );
5537  trmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
5538  }
5539  else {
5540  gemm( C, A, B, ET(scalar), ET(0) );
5541  }
5542  }
5543 #endif
5544  //**********************************************************************************************
5545 
5546  //**Assignment to sparse matrices***************************************************************
5558  template< typename MT // Type of the target sparse matrix
5559  , bool SO > // Storage order of the target sparse matrix
5560  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5561  {
5563 
5564  typedef IfTrue_< SO, OppositeType, ResultType > TmpType;
5565 
5571  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( CompositeType_<TmpType> );
5572 
5573  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5574  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5575 
5576  const TmpType tmp( serial( rhs ) );
5577  assign( ~lhs, tmp );
5578  }
5579  //**********************************************************************************************
5580 
5581  //**Addition assignment to dense matrices*******************************************************
5593  template< typename MT // Type of the target dense matrix
5594  , bool SO > // Storage order of the target dense matrix
5595  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5596  {
5598 
5599  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5600  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5601 
5602  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
5603  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
5604 
5605  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
5606  return;
5607  }
5608 
5609  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
5610  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
5611 
5612  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5613  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
5614  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
5615  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
5616  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
5617  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
5618 
5619  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
5620  }
5621  //**********************************************************************************************
5622 
5623  //**Addition assignment to dense matrices (kernel selection)************************************
5634  template< typename MT3 // Type of the left-hand side target matrix
5635  , typename MT4 // Type of the left-hand side matrix operand
5636  , typename MT5 // Type of the right-hand side matrix operand
5637  , typename ST2 > // Type of the scalar value
5638  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5639  {
5640  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
5641  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
5642  selectSmallAddAssignKernel( C, A, B, scalar );
5643  else
5644  selectBlasAddAssignKernel( C, A, B, scalar );
5645  }
5646  //**********************************************************************************************
5647 
5648  //**Default addition assignment to dense matrices (general/general)*****************************
5662  template< typename MT3 // Type of the left-hand side target matrix
5663  , typename MT4 // Type of the left-hand side matrix operand
5664  , typename MT5 // Type of the right-hand side matrix operand
5665  , typename ST2 > // Type of the scalar value
5666  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
5667  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5668  {
5669  const ResultType tmp( serial( A * B * scalar ) );
5670  addAssign( C, tmp );
5671  }
5672  //**********************************************************************************************
5673 
5674  //**Default addition assignment to row-major dense matrices (general/diagonal)******************
5688  template< typename MT3 // Type of the left-hand side target matrix
5689  , typename MT4 // Type of the left-hand side matrix operand
5690  , typename MT5 // Type of the right-hand side matrix operand
5691  , typename ST2 > // Type of the scalar value
5692  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
5693  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5694  {
5695  const size_t M( A.rows() );
5696  const size_t N( B.columns() );
5697 
5698  for( size_t i=0UL; i<M; ++i )
5699  {
5700  const size_t jbegin( ( IsUpper<MT4>::value )
5701  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
5702  :( 0UL ) );
5703  const size_t jend( ( IsLower<MT4>::value )
5704  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
5705  :( N ) );
5706  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
5707 
5708  const size_t jnum( jend - jbegin );
5709  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
5710 
5711  for( size_t j=jbegin; j<jpos; j+=2UL ) {
5712  (~C)(i,j ) += A(i,j ) * B(j ,j ) * scalar;
5713  (~C)(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
5714  }
5715  if( jpos < jend ) {
5716  (~C)(i,jpos) += A(i,jpos) * B(jpos,jpos) * scalar;
5717  }
5718  }
5719  }
5720  //**********************************************************************************************
5721 
5722  //**Default addition assignment to column-major dense matrices (general/diagonal)***************
5736  template< typename MT3 // Type of the left-hand side target matrix
5737  , typename MT4 // Type of the left-hand side matrix operand
5738  , typename MT5 // Type of the right-hand side matrix operand
5739  , typename ST2 > // Type of the scalar value
5740  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
5741  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5742  {
5743  const size_t M( A.rows() );
5744  const size_t N( B.columns() );
5745 
5746  const size_t block( BLOCK_SIZE );
5747 
5748  for( size_t jj=0UL; jj<N; jj+=block ) {
5749  const size_t jend( min( N, jj+block ) );
5750  for( size_t ii=0UL; ii<M; ii+=block ) {
5751  const size_t iend( min( M, ii+block ) );
5752  for( size_t j=jj; j<jend; ++j )
5753  {
5754  const size_t ibegin( ( IsLower<MT4>::value )
5755  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
5756  :( ii ) );
5757  const size_t ipos( ( IsUpper<MT4>::value )
5758  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
5759  :( iend ) );
5760 
5761  for( size_t i=ibegin; i<ipos; ++i ) {
5762  (~C)(i,j) += A(i,j) * B(j,j) * scalar;
5763  }
5764  }
5765  }
5766  }
5767  }
5768  //**********************************************************************************************
5769 
5770  //**Default addition assignment to row-major dense matrices (diagonal/general)******************
5784  template< typename MT3 // Type of the left-hand side target matrix
5785  , typename MT4 // Type of the left-hand side matrix operand
5786  , typename MT5 // Type of the right-hand side matrix operand
5787  , typename ST2 > // Type of the scalar value
5788  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
5789  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5790  {
5791  const size_t M( A.rows() );
5792  const size_t N( B.columns() );
5793 
5794  const size_t block( BLOCK_SIZE );
5795 
5796  for( size_t ii=0UL; ii<M; ii+=block ) {
5797  const size_t iend( min( M, ii+block ) );
5798  for( size_t jj=0UL; jj<N; jj+=block ) {
5799  const size_t jend( min( N, jj+block ) );
5800  for( size_t i=ii; i<iend; ++i )
5801  {
5802  const size_t jbegin( ( IsUpper<MT5>::value )
5803  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
5804  :( jj ) );
5805  const size_t jpos( ( IsLower<MT5>::value )
5806  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
5807  :( jend ) );
5808 
5809  for( size_t j=jbegin; j<jpos; ++j ) {
5810  (~C)(i,j) += A(i,i) * B(i,j) * scalar;
5811  }
5812  }
5813  }
5814  }
5815  }
5816  //**********************************************************************************************
5817 
5818  //**Default addition assignment to column-major dense matrices (diagonal/general)***************
5832  template< typename MT3 // Type of the left-hand side target matrix
5833  , typename MT4 // Type of the left-hand side matrix operand
5834  , typename MT5 // Type of the right-hand side matrix operand
5835  , typename ST2 > // Type of the scalar value
5836  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
5837  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5838  {
5839  const size_t M( A.rows() );
5840  const size_t N( B.columns() );
5841 
5842  for( size_t j=0UL; j<N; ++j )
5843  {
5844  const size_t ibegin( ( IsLower<MT5>::value )
5845  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
5846  :( 0UL ) );
5847  const size_t iend( ( IsUpper<MT5>::value )
5848  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
5849  :( M ) );
5850  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
5851 
5852  const size_t inum( iend - ibegin );
5853  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
5854 
5855  for( size_t i=ibegin; i<ipos; i+=2UL ) {
5856  (~C)(i ,j) += A(i ,i ) * B(i ,j) * scalar;
5857  (~C)(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
5858  }
5859  if( ipos < iend ) {
5860  (~C)(ipos,j) += A(ipos,ipos) * B(ipos,j) * scalar;
5861  }
5862  }
5863  }
5864  //**********************************************************************************************
5865 
5866  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
5880  template< typename MT3 // Type of the left-hand side target matrix
5881  , typename MT4 // Type of the left-hand side matrix operand
5882  , typename MT5 // Type of the right-hand side matrix operand
5883  , typename ST2 > // Type of the scalar value
5884  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
5885  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5886  {
5887  for( size_t i=0UL; i<A.rows(); ++i ) {
5888  C(i,i) += A(i,i) * B(i,i) * scalar;
5889  }
5890  }
5891  //**********************************************************************************************
5892 
5893  //**Default addition assignment to dense matrices (small matrices)******************************
5907  template< typename MT3 // Type of the left-hand side target matrix
5908  , typename MT4 // Type of the left-hand side matrix operand
5909  , typename MT5 // Type of the right-hand side matrix operand
5910  , typename ST2 > // Type of the scalar value
5911  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
5912  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5913  {
5914  selectDefaultAddAssignKernel( C, A, B, scalar );
5915  }
5916  //**********************************************************************************************
5917 
5918  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
5933  template< typename MT3 // Type of the left-hand side target matrix
5934  , typename MT4 // Type of the left-hand side matrix operand
5935  , typename MT5 // Type of the right-hand side matrix operand
5936  , typename ST2 > // Type of the scalar value
5937  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
5938  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5939  {
5940  const size_t M( A.rows() );
5941  const size_t N( B.columns() );
5942  const size_t K( A.columns() );
5943 
5944  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
5945 
5946  size_t i( 0UL );
5947 
5948  for( ; (i+2UL) <= M; i+=2UL )
5949  {
5950  size_t j( 0UL );
5951 
5952  for( ; (j+4UL) <= N; j+=4UL )
5953  {
5954  const size_t kbegin( ( IsUpper<MT4>::value )
5955  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
5956  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
5957  const size_t kend( ( IsLower<MT4>::value )
5958  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
5959  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
5960 
5961  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
5962  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
5963 
5964  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5965  size_t k( kbegin );
5966 
5967  for( ; k<kpos; k+=SIMDSIZE ) {
5968  const SIMDType a1( A.load(i ,k) );
5969  const SIMDType a2( A.load(i+1UL,k) );
5970  const SIMDType b1( B.load(k,j ) );
5971  const SIMDType b2( B.load(k,j+1UL) );
5972  const SIMDType b3( B.load(k,j+2UL) );
5973  const SIMDType b4( B.load(k,j+3UL) );
5974  xmm1 = xmm1 + a1 * b1;
5975  xmm2 = xmm2 + a1 * b2;
5976  xmm3 = xmm3 + a1 * b3;
5977  xmm4 = xmm4 + a1 * b4;
5978  xmm5 = xmm5 + a2 * b1;
5979  xmm6 = xmm6 + a2 * b2;
5980  xmm7 = xmm7 + a2 * b3;
5981  xmm8 = xmm8 + a2 * b4;
5982  }
5983 
5984  (~C)(i ,j ) += sum( xmm1 ) * scalar;
5985  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
5986  (~C)(i ,j+2UL) += sum( xmm3 ) * scalar;
5987  (~C)(i ,j+3UL) += sum( xmm4 ) * scalar;
5988  (~C)(i+1UL,j ) += sum( xmm5 ) * scalar;
5989  (~C)(i+1UL,j+1UL) += sum( xmm6 ) * scalar;
5990  (~C)(i+1UL,j+2UL) += sum( xmm7 ) * scalar;
5991  (~C)(i+1UL,j+3UL) += sum( xmm8 ) * scalar;
5992 
5993  for( ; remainder && k<kend; ++k ) {
5994  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
5995  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
5996  (~C)(i ,j+2UL) += A(i ,k) * B(k,j+2UL) * scalar;
5997  (~C)(i ,j+3UL) += A(i ,k) * B(k,j+3UL) * scalar;
5998  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
5999  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6000  (~C)(i+1UL,j+2UL) += A(i+1UL,k) * B(k,j+2UL) * scalar;
6001  (~C)(i+1UL,j+3UL) += A(i+1UL,k) * B(k,j+3UL) * scalar;
6002  }
6003  }
6004 
6005  for( ; (j+2UL) <= N; j+=2UL )
6006  {
6007  const size_t kbegin( ( IsUpper<MT4>::value )
6008  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6009  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6010  const size_t kend( ( IsLower<MT4>::value )
6011  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
6012  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6013 
6014  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6015  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6016 
6017  SIMDType xmm1, xmm2, xmm3, xmm4;
6018  size_t k( kbegin );
6019 
6020  for( ; k<kpos; k+=SIMDSIZE ) {
6021  const SIMDType a1( A.load(i ,k) );
6022  const SIMDType a2( A.load(i+1UL,k) );
6023  const SIMDType b1( B.load(k,j ) );
6024  const SIMDType b2( B.load(k,j+1UL) );
6025  xmm1 = xmm1 + a1 * b1;
6026  xmm2 = xmm2 + a1 * b2;
6027  xmm3 = xmm3 + a2 * b1;
6028  xmm4 = xmm4 + a2 * b2;
6029  }
6030 
6031  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6032  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6033  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6034  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6035 
6036  for( ; remainder && k<kend; ++k ) {
6037  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6038  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6039  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6040  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6041  }
6042  }
6043 
6044  if( j < N )
6045  {
6046  const size_t kbegin( ( IsUpper<MT4>::value )
6047  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6048  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6049  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
6050 
6051  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6052  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6053 
6054  SIMDType xmm1, xmm2;
6055  size_t k( kbegin );
6056 
6057  for( ; k<kpos; k+=SIMDSIZE ) {
6058  const SIMDType b1( B.load(k,j) );
6059  xmm1 = xmm1 + A.load(i ,k) * b1;
6060  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
6061  }
6062 
6063  (~C)(i ,j) += sum( xmm1 ) * scalar;
6064  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6065 
6066  for( ; remainder && k<kend; ++k ) {
6067  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
6068  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
6069  }
6070  }
6071  }
6072 
6073  if( i < M )
6074  {
6075  size_t j( 0UL );
6076 
6077  for( ; (j+4UL) <= N; j+=4UL )
6078  {
6079  const size_t kbegin( ( IsUpper<MT4>::value )
6080  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6081  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6082  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
6083 
6084  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6085  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6086 
6087  SIMDType xmm1, xmm2, xmm3, xmm4;
6088  size_t k( kbegin );
6089 
6090  for( ; k<kpos; k+=SIMDSIZE ) {
6091  const SIMDType a1( A.load(i,k) );
6092  xmm1 = xmm1 + a1 * B.load(k,j );
6093  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
6094  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
6095  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
6096  }
6097 
6098  (~C)(i,j ) += sum( xmm1 ) * scalar;
6099  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6100  (~C)(i,j+2UL) += sum( xmm3 ) * scalar;
6101  (~C)(i,j+3UL) += sum( xmm4 ) * scalar;
6102 
6103  for( ; remainder && k<kend; ++k ) {
6104  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
6105  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
6106  (~C)(i,j+2UL) += A(i,k) * B(k,j+2UL) * scalar;
6107  (~C)(i,j+3UL) += A(i,k) * B(k,j+3UL) * scalar;
6108  }
6109  }
6110 
6111  for( ; (j+2UL) <= N; j+=2UL )
6112  {
6113  const size_t kbegin( ( IsUpper<MT4>::value )
6114  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6115  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6116  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
6117 
6118  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6119  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6120 
6121  SIMDType xmm1, xmm2;
6122  size_t k( kbegin );
6123 
6124  for( ; k<kpos; k+=SIMDSIZE ) {
6125  const SIMDType a1( A.load(i,k) );
6126  xmm1 = xmm1 + a1 * B.load(k,j );
6127  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
6128  }
6129 
6130  (~C)(i,j ) += sum( xmm1 ) * scalar;
6131  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6132 
6133  for( ; remainder && k<kend; ++k ) {
6134  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
6135  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
6136  }
6137  }
6138 
6139  if( j < N )
6140  {
6141  const size_t kbegin( ( IsUpper<MT4>::value )
6142  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6143  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6144 
6145  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
6146  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6147 
6148  SIMDType xmm1;
6149  size_t k( kbegin );
6150 
6151  for( ; k<kpos; k+=SIMDSIZE ) {
6152  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
6153  }
6154 
6155  (~C)(i,j) += sum( xmm1 ) * scalar;
6156 
6157  for( ; remainder && k<K; ++k ) {
6158  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
6159  }
6160  }
6161  }
6162  }
6163  //**********************************************************************************************
6164 
6165  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
6180  template< typename MT3 // Type of the left-hand side target matrix
6181  , typename MT4 // Type of the left-hand side matrix operand
6182  , typename MT5 // Type of the right-hand side matrix operand
6183  , typename ST2 > // Type of the scalar value
6184  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
6185  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6186  {
6187  const size_t M( A.rows() );
6188  const size_t N( B.columns() );
6189  const size_t K( A.columns() );
6190 
6191  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
6192 
6193  size_t i( 0UL );
6194 
6195  for( ; (i+4UL) <= M; i+=4UL )
6196  {
6197  size_t j( 0UL );
6198 
6199  for( ; (j+2UL) <= N; j+=2UL )
6200  {
6201  const size_t kbegin( ( IsUpper<MT4>::value )
6202  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6203  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6204  const size_t kend( ( IsLower<MT4>::value )
6205  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
6206  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6207 
6208  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6209  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6210 
6211  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6212  size_t k( kbegin );
6213 
6214  for( ; k<kpos; k+=SIMDSIZE ) {
6215  const SIMDType a1( A.load(i ,k) );
6216  const SIMDType a2( A.load(i+1UL,k) );
6217  const SIMDType a3( A.load(i+2UL,k) );
6218  const SIMDType a4( A.load(i+3UL,k) );
6219  const SIMDType b1( B.load(k,j ) );
6220  const SIMDType b2( B.load(k,j+1UL) );
6221  xmm1 = xmm1 + a1 * b1;
6222  xmm2 = xmm2 + a1 * b2;
6223  xmm3 = xmm3 + a2 * b1;
6224  xmm4 = xmm4 + a2 * b2;
6225  xmm5 = xmm5 + a3 * b1;
6226  xmm6 = xmm6 + a3 * b2;
6227  xmm7 = xmm7 + a4 * b1;
6228  xmm8 = xmm8 + a4 * b2;
6229  }
6230 
6231  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6232  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6233  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6234  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6235  (~C)(i+2UL,j ) += sum( xmm5 ) * scalar;
6236  (~C)(i+2UL,j+1UL) += sum( xmm6 ) * scalar;
6237  (~C)(i+3UL,j ) += sum( xmm7 ) * scalar;
6238  (~C)(i+3UL,j+1UL) += sum( xmm8 ) * scalar;
6239 
6240  for( ; remainder && k<kend; ++k ) {
6241  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6242  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6243  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6244  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6245  (~C)(i+2UL,j ) += A(i+2UL,k) * B(k,j ) * scalar;
6246  (~C)(i+2UL,j+1UL) += A(i+2UL,k) * B(k,j+1UL) * scalar;
6247  (~C)(i+3UL,j ) += A(i+3UL,k) * B(k,j ) * scalar;
6248  (~C)(i+3UL,j+1UL) += A(i+3UL,k) * B(k,j+1UL) * scalar;
6249  }
6250  }
6251 
6252  if( j < N )
6253  {
6254  const size_t kbegin( ( IsUpper<MT4>::value )
6255  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6256  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6257  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
6258 
6259  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6260  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6261 
6262  SIMDType xmm1, xmm2, xmm3, xmm4;
6263  size_t k( kbegin );
6264 
6265  for( ; k<kpos; k+=SIMDSIZE ) {
6266  const SIMDType b1( B.load(k,j) );
6267  xmm1 = xmm1 + A.load(i ,k) * b1;
6268  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
6269  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
6270  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
6271  }
6272 
6273  (~C)(i ,j) += sum( xmm1 ) * scalar;
6274  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6275  (~C)(i+2UL,j) += sum( xmm3 ) * scalar;
6276  (~C)(i+3UL,j) += sum( xmm4 ) * scalar;
6277 
6278  for( ; remainder && k<kend; ++k ) {
6279  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
6280  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
6281  (~C)(i+2UL,j) += A(i+2UL,k) * B(k,j) * scalar;
6282  (~C)(i+3UL,j) += A(i+3UL,k) * B(k,j) * scalar;
6283  }
6284  }
6285  }
6286 
6287  for( ; (i+2UL) <= M; i+=2UL )
6288  {
6289  size_t j( 0UL );
6290 
6291  for( ; (j+2UL) <= N; j+=2UL )
6292  {
6293  const size_t kbegin( ( IsUpper<MT4>::value )
6294  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6295  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6296  const size_t kend( ( IsLower<MT4>::value )
6297  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
6298  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6299 
6300  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6301  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6302 
6303  SIMDType xmm1, xmm2, xmm3, xmm4;
6304  size_t k( kbegin );
6305 
6306  for( ; k<kpos; k+=SIMDSIZE ) {
6307  const SIMDType a1( A.load(i ,k) );
6308  const SIMDType a2( A.load(i+1UL,k) );
6309  const SIMDType b1( B.load(k,j ) );
6310  const SIMDType b2( B.load(k,j+1UL) );
6311  xmm1 = xmm1 + a1 * b1;
6312  xmm2 = xmm2 + a1 * b2;
6313  xmm3 = xmm3 + a2 * b1;
6314  xmm4 = xmm4 + a2 * b2;
6315  }
6316 
6317  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6318  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6319  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6320  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6321 
6322  for( ; remainder && k<kend; ++k ) {
6323  (~C)(i ,j ) += A(i ,k) * B(k,j ) * scalar;
6324  (~C)(i ,j+1UL) += A(i ,k) * B(k,j+1UL) * scalar;
6325  (~C)(i+1UL,j ) += A(i+1UL,k) * B(k,j ) * scalar;
6326  (~C)(i+1UL,j+1UL) += A(i+1UL,k) * B(k,j+1UL) * scalar;
6327  }
6328  }
6329 
6330  if( j < N )
6331  {
6332  const size_t kbegin( ( IsUpper<MT4>::value )
6333  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6334  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6335  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
6336 
6337  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6338  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6339 
6340  SIMDType xmm1, xmm2;
6341  size_t k( kbegin );
6342 
6343  for( ; k<kpos; k+=SIMDSIZE ) {
6344  const SIMDType b1( B.load(k,j) );
6345  xmm1 = xmm1 + A.load(i ,k) * b1;
6346  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
6347  }
6348 
6349  (~C)(i ,j) += sum( xmm1 ) * scalar;
6350  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6351 
6352  for( ; remainder && k<kend; ++k ) {
6353  (~C)(i ,j) += A(i ,k) * B(k,j) * scalar;
6354  (~C)(i+1UL,j) += A(i+1UL,k) * B(k,j) * scalar;
6355  }
6356  }
6357  }
6358 
6359  if( i < M )
6360  {
6361  size_t j( 0UL );
6362 
6363  for( ; (j+2UL) <= N; j+=2UL )
6364  {
6365  const size_t kbegin( ( IsUpper<MT4>::value )
6366  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6367  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6368  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
6369 
6370  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6371  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6372 
6373  SIMDType xmm1, xmm2;
6374  size_t k( kbegin );
6375 
6376  for( ; k<kpos; k+=SIMDSIZE ) {
6377  const SIMDType a1( A.load(i,k) );
6378  xmm1 = xmm1 + a1 * B.load(k,j );
6379  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
6380  }
6381 
6382  (~C)(i,j ) += sum( xmm1 ) * scalar;
6383  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6384 
6385  for( ; remainder && k<kend; ++k ) {
6386  (~C)(i,j ) += A(i,k) * B(k,j ) * scalar;
6387  (~C)(i,j+1UL) += A(i,k) * B(k,j+1UL) * scalar;
6388  }
6389  }
6390 
6391  if( j < N )
6392  {
6393  const size_t kbegin( ( IsUpper<MT4>::value )
6394  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6395  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6396 
6397  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
6398  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6399 
6400  SIMDType xmm1;
6401  size_t k( kbegin );
6402 
6403  for( ; k<kpos; k+=SIMDSIZE ) {
6404  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
6405  }
6406 
6407  (~C)(i,j) += sum( xmm1 ) * scalar;
6408 
6409  for( ; remainder && k<K; ++k ) {
6410  (~C)(i,j) += A(i,k) * B(k,j) * scalar;
6411  }
6412  }
6413  }
6414  }
6415  //**********************************************************************************************
6416 
6417  //**Default addition assignment to dense matrices (large matrices)******************************
6431  template< typename MT3 // Type of the left-hand side target matrix
6432  , typename MT4 // Type of the left-hand side matrix operand
6433  , typename MT5 // Type of the right-hand side matrix operand
6434  , typename ST2 > // Type of the scalar value
6435  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
6436  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6437  {
6438  selectDefaultAddAssignKernel( C, A, B, scalar );
6439  }
6440  //**********************************************************************************************
6441 
6442  //**Vectorized default addition assignment to row-major dense matrices (large matrices)*********
6457  template< typename MT3 // Type of the left-hand side target matrix
6458  , typename MT4 // Type of the left-hand side matrix operand
6459  , typename MT5 // Type of the right-hand side matrix operand
6460  , typename ST2 > // Type of the scalar value
6461  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
6462  selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6463  {
6464  // TODO
6465  selectSmallAddAssignKernel( ~C, A, B, scalar );
6466  }
6467  //**********************************************************************************************
6468 
6469  //**Vectorized default addition assignment to column-major dense matrices (large matrices)******
6484  template< typename MT3 // Type of the left-hand side target matrix
6485  , typename MT4 // Type of the left-hand side matrix operand
6486  , typename MT5 // Type of the right-hand side matrix operand
6487  , typename ST2 > // Type of the scalar value
6488  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
6489  selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6490  {
6491  // TODO
6492  selectSmallAddAssignKernel( ~C, A, B, scalar );
6493  }
6494  //**********************************************************************************************
6495 
6496  //**BLAS-based addition assignment to dense matrices (default)**********************************
6510  template< typename MT3 // Type of the left-hand side target matrix
6511  , typename MT4 // Type of the left-hand side matrix operand
6512  , typename MT5 // Type of the right-hand side matrix operand
6513  , typename ST2 > // Type of the scalar value
6514  static inline DisableIf_< UseBlasKernel<MT3,MT4,MT5,ST2> >
6515  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6516  {
6517  selectLargeAddAssignKernel( C, A, B, scalar );
6518  }
6519  //**********************************************************************************************
6520 
6521  //**BLAS-based addition assignment to dense matrices********************************************
6522 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
6523 
6536  template< typename MT3 // Type of the left-hand side target matrix
6537  , typename MT4 // Type of the left-hand side matrix operand
6538  , typename MT5 // Type of the right-hand side matrix operand
6539  , typename ST2 > // Type of the scalar value
6540  static inline EnableIf_< UseBlasKernel<MT3,MT4,MT5,ST2> >
6541  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6542  {
6543  typedef ElementType_<MT3> ET;
6544 
6545  if( IsTriangular<MT4>::value ) {
6546  ResultType_<MT3> tmp( serial( B ) );
6547  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6548  addAssign( C, tmp );
6549  }
6550  else if( IsTriangular<MT5>::value ) {
6551  ResultType_<MT3> tmp( serial( A ) );
6552  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
6553  addAssign( C, tmp );
6554  }
6555  else {
6556  gemm( C, A, B, ET(scalar), ET(1) );
6557  }
6558  }
6559 #endif
6560  //**********************************************************************************************
6561 
6562  //**Addition assignment to sparse matrices******************************************************
6563  // No special implementation for the addition assignment to sparse matrices.
6564  //**********************************************************************************************
6565 
6566  //**Subtraction assignment to dense matrices****************************************************
6578  template< typename MT // Type of the target dense matrix
6579  , bool SO > // Storage order of the target dense matrix
6580  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
6581  {
6583 
6584  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6585  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6586 
6587  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
6588  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
6589 
6590  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
6591  return;
6592  }
6593 
6594  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
6595  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
6596 
6597  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
6598  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
6599  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
6600  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
6601  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
6602  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
6603 
6604  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
6605  }
6606  //**********************************************************************************************
6607 
6608  //**Subtraction assignment to dense matrices (kernel selection)*********************************
6619  template< typename MT3 // Type of the left-hand side target matrix
6620  , typename MT4 // Type of the left-hand side matrix operand
6621  , typename MT5 // Type of the right-hand side matrix operand
6622  , typename ST2 > // Type of the scalar value
6623  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6624  {
6625  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
6626  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
6627  selectSmallSubAssignKernel( C, A, B, scalar );
6628  else
6629  selectBlasSubAssignKernel( C, A, B, scalar );
6630  }
6631  //**********************************************************************************************
6632 
6633  //**Default subtraction assignment to dense matrices (general/general)**************************
6647  template< typename MT3 // Type of the left-hand side target matrix
6648  , typename MT4 // Type of the left-hand side matrix operand
6649  , typename MT5 // Type of the right-hand side matrix operand
6650  , typename ST2 > // Type of the scalar value
6651  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >
6652  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6653  {
6654  const ResultType tmp( serial( A * B * scalar ) );
6655  subAssign( C, tmp );
6656  }
6657  //**********************************************************************************************
6658 
6659  //**Default subtraction assignment to row-major dense matrices (general/diagonal)***************
6673  template< typename MT3 // Type of the left-hand side target matrix
6674  , typename MT4 // Type of the left-hand side matrix operand
6675  , typename MT5 // Type of the right-hand side matrix operand
6676  , typename ST2 > // Type of the scalar value
6677  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6678  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6679  {
6680  const size_t M( A.rows() );
6681  const size_t N( B.columns() );
6682 
6683  for( size_t i=0UL; i<M; ++i )
6684  {
6685  const size_t jbegin( ( IsUpper<MT4>::value )
6686  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
6687  :( 0UL ) );
6688  const size_t jend( ( IsLower<MT4>::value )
6689  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
6690  :( N ) );
6691  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
6692 
6693  const size_t jnum( jend - jbegin );
6694  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
6695 
6696  for( size_t j=jbegin; j<jpos; j+=2UL ) {
6697  (~C)(i,j ) -= A(i,j ) * B(j ,j ) * scalar;
6698  (~C)(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
6699  }
6700  if( jpos < jend ) {
6701  (~C)(i,jpos) -= A(i,jpos) * B(jpos,jpos) * scalar;
6702  }
6703  }
6704  }
6705  //**********************************************************************************************
6706 
6707  //**Default subtraction assignment to column-major dense matrices (general/diagonal)************
6721  template< typename MT3 // Type of the left-hand side target matrix
6722  , typename MT4 // Type of the left-hand side matrix operand
6723  , typename MT5 // Type of the right-hand side matrix operand
6724  , typename ST2 > // Type of the scalar value
6725  static inline EnableIf_< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >
6726  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6727  {
6728  const size_t M( A.rows() );
6729  const size_t N( B.columns() );
6730 
6731  const size_t block( BLOCK_SIZE );
6732 
6733  for( size_t jj=0UL; jj<N; jj+=block ) {
6734  const size_t jend( min( N, jj+block ) );
6735  for( size_t ii=0UL; ii<M; ii+=block ) {
6736  const size_t iend( min( M, ii+block ) );
6737  for( size_t j=jj; j<jend; ++j )
6738  {
6739  const size_t ibegin( ( IsLower<MT4>::value )
6740  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
6741  :( ii ) );
6742  const size_t ipos( ( IsUpper<MT4>::value )
6743  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
6744  :( iend ) );
6745 
6746  for( size_t i=ibegin; i<ipos; ++i ) {
6747  (~C)(i,j) -= A(i,j) * B(j,j) * scalar;
6748  }
6749  }
6750  }
6751  }
6752  }
6753  //**********************************************************************************************
6754 
6755  //**Default subtraction assignment to row-major dense matrices (diagonal/general)***************
6770  template< typename MT3 // Type of the left-hand side target matrix
6771  , typename MT4 // Type of the left-hand side matrix operand
6772  , typename MT5 // Type of the right-hand side matrix operand
6773  , typename ST2 > // Type of the scalar value
6774  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
6775  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6776  {
6777  const size_t M( A.rows() );
6778  const size_t N( B.columns() );
6779 
6780  const size_t block( BLOCK_SIZE );
6781 
6782  for( size_t ii=0UL; ii<M; ii+=block ) {
6783  const size_t iend( min( M, ii+block ) );
6784  for( size_t jj=0UL; jj<N; jj+=block ) {
6785  const size_t jend( min( N, jj+block ) );
6786  for( size_t i=ii; i<iend; ++i )
6787  {
6788  const size_t jbegin( ( IsUpper<MT5>::value )
6789  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
6790  :( jj ) );
6791  const size_t jpos( ( IsLower<MT5>::value )
6792  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
6793  :( jend ) );
6794 
6795  for( size_t j=jbegin; j<jpos; ++j ) {
6796  (~C)(i,j) -= A(i,i) * B(i,j) * scalar;
6797  }
6798  }
6799  }
6800  }
6801  }
6802  //**********************************************************************************************
6803 
6804  //**Default subtraction assignment to column-major dense matrices (diagonal/general)************
6819  template< typename MT3 // Type of the left-hand side target matrix
6820  , typename MT4 // Type of the left-hand side matrix operand
6821  , typename MT5 // Type of the right-hand side matrix operand
6822  , typename ST2 > // Type of the scalar value
6823  static inline EnableIf_< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >
6824  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6825  {
6826  const size_t M( A.rows() );
6827  const size_t N( B.columns() );
6828 
6829  for( size_t j=0UL; j<N; ++j )
6830  {
6831  const size_t ibegin( ( IsLower<MT5>::value )
6832  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
6833  :( 0UL ) );
6834  const size_t iend( ( IsUpper<MT5>::value )
6835  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
6836  :( M ) );
6837  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
6838 
6839  const size_t inum( iend - ibegin );
6840  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
6841 
6842  for( size_t i=ibegin; i<ipos; i+=2UL ) {
6843  (~C)(i ,j) -= A(i ,i ) * B(i ,j) * scalar;
6844  (~C)(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
6845  }
6846  if( ipos < iend ) {
6847  (~C)(ipos,j) -= A(ipos,ipos) * B(ipos,j) * scalar;
6848  }
6849  }
6850  }
6851  //**********************************************************************************************
6852 
6853  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
6867  template< typename MT3 // Type of the left-hand side target matrix
6868  , typename MT4 // Type of the left-hand side matrix operand
6869  , typename MT5 // Type of the right-hand side matrix operand
6870  , typename ST2 > // Type of the scalar value
6871  static inline EnableIf_< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >
6872  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6873  {
6874  for( size_t i=0UL; i<A.rows(); ++i ) {
6875  C(i,i) -= A(i,i) * B(i,i) * scalar;
6876  }
6877  }
6878  //**********************************************************************************************
6879 
6880  //**Default subtraction assignment to dense matrices (small matrices)***************************
6894  template< typename MT3 // Type of the left-hand side target matrix
6895  , typename MT4 // Type of the left-hand side matrix operand
6896  , typename MT5 // Type of the right-hand side matrix operand
6897  , typename ST2 > // Type of the scalar value
6898  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
6899  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6900  {
6901  selectDefaultSubAssignKernel( C, A, B, scalar );
6902  }
6903  //**********************************************************************************************
6904 
6905  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
6920  template< typename MT3 // Type of the left-hand side target matrix
6921  , typename MT4 // Type of the left-hand side matrix operand
6922  , typename MT5 // Type of the right-hand side matrix operand
6923  , typename ST2 > // Type of the scalar value
6924  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
6925  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6926  {
6927  const size_t M( A.rows() );
6928  const size_t N( B.columns() );
6929  const size_t K( A.columns() );
6930 
6931  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
6932 
6933  size_t i( 0UL );
6934 
6935  for( ; (i+2UL) <= M; i+=2UL )
6936  {
6937  size_t j( 0UL );
6938 
6939  for( ; (j+4UL) <= N; j+=4UL )
6940  {
6941  const size_t kbegin( ( IsUpper<MT4>::value )
6942  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6943  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6944  const size_t kend( ( IsLower<MT4>::value )
6945  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
6946  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
6947 
6948  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
6949  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
6950 
6951  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6952  size_t k( kbegin );
6953 
6954  for( ; k<kpos; k+=SIMDSIZE ) {
6955  const SIMDType a1( A.load(i ,k) );
6956  const SIMDType a2( A.load(i+1UL,k) );
6957  const SIMDType b1( B.load(k,j ) );
6958  const SIMDType b2( B.load(k,j+1UL) );
6959  const SIMDType b3( B.load(k,j+2UL) );
6960  const SIMDType b4( B.load(k,j+3UL) );
6961  xmm1 = xmm1 + a1 * b1;
6962  xmm2 = xmm2 + a1 * b2;
6963  xmm3 = xmm3 + a1 * b3;
6964  xmm4 = xmm4 + a1 * b4;
6965  xmm5 = xmm5 + a2 * b1;
6966  xmm6 = xmm6 + a2 * b2;
6967  xmm7 = xmm7 + a2 * b3;
6968  xmm8 = xmm8 + a2 * b4;
6969  }
6970 
6971  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
6972  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
6973  (~C)(i ,j+2UL) -= sum( xmm3 ) * scalar;
6974  (~C)(i ,j+3UL) -= sum( xmm4 ) * scalar;
6975  (~C)(i+1UL,j ) -= sum( xmm5 ) * scalar;
6976  (~C)(i+1UL,j+1UL) -= sum( xmm6 ) * scalar;
6977  (~C)(i+1UL,j+2UL) -= sum( xmm7 ) * scalar;
6978  (~C)(i+1UL,j+3UL) -= sum( xmm8 ) * scalar;
6979 
6980  for( ; remainder && k<kend; ++k ) {
6981  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
6982  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
6983  (~C)(i ,j+2UL) -= A(i ,k) * B(k,j+2UL) * scalar;
6984  (~C)(i ,j+3UL) -= A(i ,k) * B(k,j+3UL) * scalar;
6985  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
6986  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
6987  (~C)(i+1UL,j+2UL) -= A(i+1UL,k) * B(k,j+2UL) * scalar;
6988  (~C)(i+1UL,j+3UL) -= A(i+1UL,k) * B(k,j+3UL) * scalar;
6989  }
6990  }
6991 
6992  for( ; (j+2UL) <= N; j+=2UL )
6993  {
6994  const size_t kbegin( ( IsUpper<MT4>::value )
6995  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
6996  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
6997  const size_t kend( ( IsLower<MT4>::value )
6998  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
6999  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7000 
7001  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7002  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7003 
7004  SIMDType xmm1, xmm2, xmm3, xmm4;
7005  size_t k( kbegin );
7006 
7007  for( ; k<kpos; k+=SIMDSIZE ) {
7008  const SIMDType a1( A.load(i ,k) );
7009  const SIMDType a2( A.load(i+1UL,k) );
7010  const SIMDType b1( B.load(k,j ) );
7011  const SIMDType b2( B.load(k,j+1UL) );
7012  xmm1 = xmm1 + a1 * b1;
7013  xmm2 = xmm2 + a1 * b2;
7014  xmm3 = xmm3 + a2 * b1;
7015  xmm4 = xmm4 + a2 * b2;
7016  }
7017 
7018  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7019  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7020  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7021  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7022 
7023  for( ; remainder && k<kend; ++k ) {
7024  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7025  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7026  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7027  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7028  }
7029  }
7030 
7031  if( j < N )
7032  {
7033  const size_t kbegin( ( IsUpper<MT4>::value )
7034  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7035  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7036  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
7037 
7038  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7039  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7040 
7041  SIMDType xmm1, xmm2;
7042  size_t k( kbegin );
7043 
7044  for( ; k<kpos; k+=SIMDSIZE ) {
7045  const SIMDType b1( B.load(k,j) );
7046  xmm1 = xmm1 + A.load(i ,k) * b1;
7047  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
7048  }
7049 
7050  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7051  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7052 
7053  for( ; remainder && k<kend; ++k ) {
7054  (~C)(i ,j) -= A(i ,k) * B(k,j) * scalar;
7055  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j) * scalar;
7056  }
7057  }
7058  }
7059 
7060  if( i < M )
7061  {
7062  size_t j( 0UL );
7063 
7064  for( ; (j+4UL) <= N; j+=4UL )
7065  {
7066  const size_t kbegin( ( IsUpper<MT4>::value )
7067  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7068  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7069  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
7070 
7071  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7072  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7073 
7074  SIMDType xmm1, xmm2, xmm3, xmm4;
7075  size_t k( kbegin );
7076 
7077  for( ; k<kpos; k+=SIMDSIZE ) {
7078  const SIMDType a1( A.load(i,k) );
7079  xmm1 = xmm1 + a1 * B.load(k,j );
7080  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
7081  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
7082  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
7083  }
7084 
7085  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7086  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7087  (~C)(i,j+2UL) -= sum( xmm3 ) * scalar;
7088  (~C)(i,j+3UL) -= sum( xmm4 ) * scalar;
7089 
7090  for( ; remainder && k<kend; ++k ) {
7091  (~C)(i,j ) -= A(i,k) * B(k,j ) * scalar;
7092  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL) * scalar;
7093  (~C)(i,j+2UL) -= A(i,k) * B(k,j+2UL) * scalar;
7094  (~C)(i,j+3UL) -= A(i,k) * B(k,j+3UL) * scalar;
7095  }
7096  }
7097 
7098  for( ; (j+2UL) <= N; j+=2UL )
7099  {
7100  const size_t kbegin( ( IsUpper<MT4>::value )
7101  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7102  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7103  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
7104 
7105  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7106  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7107 
7108  SIMDType xmm1, xmm2;
7109  size_t k( kbegin );
7110 
7111  for( ; k<kpos; k+=SIMDSIZE ) {
7112  const SIMDType a1( A.load(i,k) );
7113  xmm1 = xmm1 + a1 * B.load(k,j );
7114  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
7115  }
7116 
7117  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7118  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7119 
7120  for( ; remainder && k<kend; ++k ) {
7121  (~C)(i,j ) -= A(i,k) * B(k,j ) * scalar;
7122  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL) * scalar;
7123  }
7124  }
7125 
7126  if( j < N )
7127  {
7128  const size_t kbegin( ( IsUpper<MT4>::value )
7129  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7130  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7131 
7132  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
7133  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7134 
7135  SIMDType xmm1;
7136  size_t k( kbegin );
7137 
7138  for( ; k<kpos; k+=SIMDSIZE ) {
7139  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
7140  }
7141 
7142  (~C)(i,j) -= sum( xmm1 ) * scalar;
7143 
7144  for( ; remainder && k<K; ++k ) {
7145  (~C)(i,j) -= A(i,k) * B(k,j) * scalar;
7146  }
7147  }
7148  }
7149  }
7150  //**********************************************************************************************
7151 
7152  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
7167  template< typename MT3 // Type of the left-hand side target matrix
7168  , typename MT4 // Type of the left-hand side matrix operand
7169  , typename MT5 // Type of the right-hand side matrix operand
7170  , typename ST2 > // Type of the scalar value
7171  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
7172  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7173  {
7174  const size_t M( A.rows() );
7175  const size_t N( B.columns() );
7176  const size_t K( A.columns() );
7177 
7178  const bool remainder( !IsPadded<MT4>::value || !IsPadded<MT5>::value );
7179 
7180  size_t i( 0UL );
7181 
7182  for( ; (i+4UL) <= M; i+=4UL )
7183  {
7184  size_t j( 0UL );
7185 
7186  for( ; (j+2UL) <= N; j+=2UL )
7187  {
7188  const size_t kbegin( ( IsUpper<MT4>::value )
7189  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7190  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7191  const size_t kend( ( IsLower<MT4>::value )
7192  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
7193  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7194 
7195  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7196  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7197 
7198  SIMDType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7199  size_t k( kbegin );
7200 
7201  for( ; k<kpos; k+=SIMDSIZE )
7202  {
7203  const SIMDType a1( A.load(i ,k) );
7204  const SIMDType a2( A.load(i+1UL,k) );
7205  const SIMDType a3( A.load(i+2UL,k) );
7206  const SIMDType a4( A.load(i+3UL,k) );
7207  const SIMDType b1( B.load(k,j ) );
7208  const SIMDType b2( B.load(k,j+1UL) );
7209  xmm1 = xmm1 + a1 * b1;
7210  xmm2 = xmm2 + a1 * b2;
7211  xmm3 = xmm3 + a2 * b1;
7212  xmm4 = xmm4 + a2 * b2;
7213  xmm5 = xmm5 + a3 * b1;
7214  xmm6 = xmm6 + a3 * b2;
7215  xmm7 = xmm7 + a4 * b1;
7216  xmm8 = xmm8 + a4 * b2;
7217  }
7218 
7219  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7220  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7221  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7222  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7223  (~C)(i+2UL,j ) -= sum( xmm5 ) * scalar;
7224  (~C)(i+2UL,j+1UL) -= sum( xmm6 ) * scalar;
7225  (~C)(i+3UL,j ) -= sum( xmm7 ) * scalar;
7226  (~C)(i+3UL,j+1UL) -= sum( xmm8 ) * scalar;
7227 
7228  for( ; remainder && k<kend; ++k ) {
7229  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7230  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7231  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7232  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7233  (~C)(i+2UL,j ) -= A(i+2UL,k) * B(k,j ) * scalar;
7234  (~C)(i+2UL,j+1UL) -= A(i+2UL,k) * B(k,j+1UL) * scalar;
7235  (~C)(i+3UL,j ) -= A(i+3UL,k) * B(k,j ) * scalar;
7236  (~C)(i+3UL,j+1UL) -= A(i+3UL,k) * B(k,j+1UL) * scalar;
7237  }
7238  }
7239 
7240  if( j < N )
7241  {
7242  const size_t kbegin( ( IsUpper<MT4>::value )
7243  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7244  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7245  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
7246 
7247  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7248  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7249 
7250  SIMDType xmm1, xmm2, xmm3, xmm4;
7251  size_t k( kbegin );
7252 
7253  for( ; k<kpos; k+=SIMDSIZE ) {
7254  const SIMDType b1( B.load(k,j) );
7255  xmm1 = xmm1 + A.load(i ,k) * b1;
7256  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
7257  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
7258  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
7259  }
7260 
7261  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7262  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7263  (~C)(i+2UL,j) -= sum( xmm3 ) * scalar;
7264  (~C)(i+3UL,j) -= sum( xmm4 ) * scalar;
7265 
7266  for( ; remainder && k<kend; ++k ) {
7267  (~C)(i ,j) -= A(i ,k) * B(k,j) * scalar;
7268  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j) * scalar;
7269  (~C)(i+2UL,j) -= A(i+2UL,k) * B(k,j) * scalar;
7270  (~C)(i+3UL,j) -= A(i+3UL,k) * B(k,j) * scalar;
7271  }
7272  }
7273  }
7274 
7275  for( ; (i+2UL) <= M; i+=2UL )
7276  {
7277  size_t j( 0UL );
7278 
7279  for( ; (j+2UL) <= N; j+=2UL )
7280  {
7281  const size_t kbegin( ( IsUpper<MT4>::value )
7282  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7283  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7284  const size_t kend( ( IsLower<MT4>::value )
7285  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
7286  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7287 
7288  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7289  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7290 
7291  SIMDType xmm1, xmm2, xmm3, xmm4;
7292  size_t k( kbegin );
7293 
7294  for( ; k<kpos; k+=SIMDSIZE ) {
7295  const SIMDType a1( A.load(i ,k) );
7296  const SIMDType a2( A.load(i+1UL,k) );
7297  const SIMDType b1( B.load(k,j ) );
7298  const SIMDType b2( B.load(k,j+1UL) );
7299  xmm1 = xmm1 + a1 * b1;
7300  xmm2 = xmm2 + a1 * b2;
7301  xmm3 = xmm3 + a2 * b1;
7302  xmm4 = xmm4 + a2 * b2;
7303  }
7304 
7305  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7306  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7307  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7308  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7309 
7310  for( ; remainder && k<kend; ++k ) {
7311  (~C)(i ,j ) -= A(i ,k) * B(k,j ) * scalar;
7312  (~C)(i ,j+1UL) -= A(i ,k) * B(k,j+1UL) * scalar;
7313  (~C)(i+1UL,j ) -= A(i+1UL,k) * B(k,j ) * scalar;
7314  (~C)(i+1UL,j+1UL) -= A(i+1UL,k) * B(k,j+1UL) * scalar;
7315  }
7316  }
7317 
7318  if( j < N )
7319  {
7320  const size_t kbegin( ( IsUpper<MT4>::value )
7321  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7322  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7323  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
7324 
7325  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7326  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7327 
7328  SIMDType xmm1, xmm2;
7329  size_t k( kbegin );
7330 
7331  for( ; k<kpos; k+=SIMDSIZE ) {
7332  const SIMDType b1( B.load(k,j) );
7333  xmm1 = xmm1 + A.load(i ,k) * b1;
7334  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
7335  }
7336 
7337  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7338  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7339 
7340  for( ; remainder && k<kend; ++k ) {
7341  (~C)(i ,j) -= A(i ,k) * B(k,j) * scalar;
7342  (~C)(i+1UL,j) -= A(i+1UL,k) * B(k,j) * scalar;
7343  }
7344  }
7345  }
7346 
7347  if( i < M )
7348  {
7349  size_t j( 0UL );
7350 
7351  for( ; (j+2UL) <= N; j+=2UL )
7352  {
7353  const size_t kbegin( ( IsUpper<MT4>::value )
7354  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7355  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7356  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
7357 
7358  const size_t kpos( remainder ? ( kend & size_t(-SIMDSIZE) ) : kend );
7359  BLAZE_INTERNAL_ASSERT( !remainder || ( kend - ( kend % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7360 
7361  SIMDType xmm1, xmm2;
7362  size_t k( kbegin );
7363 
7364  for( ; k<kpos; k+=SIMDSIZE ) {
7365  const SIMDType a1( A.load(i,k) );
7366  xmm1 = xmm1 + a1 * B.load(k,j );
7367  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
7368  }
7369 
7370  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7371  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7372 
7373  for( ; remainder && k<kend; ++k ) {
7374  (~C)(i,j ) -= A(i,k) * B(k,j ) * scalar;
7375  (~C)(i,j+1UL) -= A(i,k) * B(k,j+1UL) * scalar;
7376  }
7377  }
7378 
7379  if( j < N )
7380  {
7381  const size_t kbegin( ( IsUpper<MT4>::value )
7382  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-SIMDSIZE) )
7383  :( IsLower<MT5>::value ? ( j & size_t(-SIMDSIZE) ) : 0UL ) );
7384 
7385  const size_t kpos( remainder ? ( K & size_t(-SIMDSIZE) ) : K );
7386  BLAZE_INTERNAL_ASSERT( !remainder || ( K - ( K % (SIMDSIZE) ) ) == kpos, "Invalid end calculation" );
7387 
7388  SIMDType xmm1;
7389  size_t k( kbegin );
7390 
7391  for( ; k<kpos; k+=SIMDSIZE ) {
7392  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
7393  }
7394 
7395  (~C)(i,j) -= sum( xmm1 ) * scalar;
7396 
7397  for( ; remainder && k<K; ++k ) {
7398  (~C)(i,j) -= A(i,k) * B(k,j) * scalar;
7399  }
7400  }
7401  }
7402  }
7403  //**********************************************************************************************
7404 
7405  //**Default subtraction assignment to dense matrices (large matrices)***************************
7419  template< typename MT3 // Type of the left-hand side target matrix
7420  , typename MT4 // Type of the left-hand side matrix operand
7421  , typename MT5 // Type of the right-hand side matrix operand
7422  , typename ST2 > // Type of the scalar value
7423  static inline DisableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
7424  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7425  {
7426  selectDefaultSubAssignKernel( C, A, B, scalar );
7427  }
7428  //**********************************************************************************************
7429 
7430  //**Vectorized default subtraction assignment to row-major dense matrices (large matrices)******
7445  template< typename MT3 // Type of the left-hand side target matrix
7446  , typename MT4 // Type of the left-hand side matrix operand
7447  , typename MT5 // Type of the right-hand side matrix operand
7448  , typename ST2 > // Type of the scalar value
7449  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
7450  selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7451  {
7452  // TODO
7453  selectSmallSubAssignKernel( ~C, A, B, scalar );
7454  }
7455  //**********************************************************************************************
7456 
7457  //**Vectorized default subtraction assignment to column-major dense matrices (large matrices)***
7472  template< typename MT3 // Type of the left-hand side target matrix
7473  , typename MT4 // Type of the left-hand side matrix operand
7474  , typename MT5 // Type of the right-hand side matrix operand
7475  , typename ST2 > // Type of the scalar value
7476  static inline EnableIf_< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >
7477  selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7478  {
7479  // TODO
7480  selectSmallSubAssignKernel( ~C, A, B, scalar );
7481  }
7482  //**********************************************************************************************
7483 
7484  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
7498  template< typename MT3 // Type of the left-hand side target matrix
7499  , typename MT4 // Type of the left-hand side matrix operand
7500  , typename MT5 // Type of the right-hand side matrix operand
7501  , typename ST2 > // Type of the scalar value
7502  static inline DisableIf_< UseBlasKernel<MT3,MT4,MT5,ST2> >
7503  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7504  {
7505  selectLargeSubAssignKernel( C, A, B, scalar );
7506  }
7507  //**********************************************************************************************
7508 
7509  //**BLAS-based subraction assignment to dense matrices******************************************
7510 #if BLAZE_BLAS_MODE && BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
7511 
7524  template< typename MT3 // Type of the left-hand side target matrix
7525  , typename MT4 // Type of the left-hand side matrix operand
7526  , typename MT5 // Type of the right-hand side matrix operand
7527  , typename ST2 > // Type of the scalar value
7528  static inline EnableIf_< UseBlasKernel<MT3,MT4,MT5,ST2> >
7529  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7530  {
7531  typedef ElementType_<MT3> ET;
7532 
7533  if( IsTriangular<MT4>::value ) {
7534  ResultType_<MT3> tmp( serial( B ) );
7535  trmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7536  subAssign( C, tmp );
7537  }
7538  else if( IsTriangular<MT5>::value ) {
7539  ResultType_<MT3> tmp( serial( A ) );
7540  trmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), ET(scalar) );
7541  subAssign( C, tmp );
7542  }
7543  else {
7544  gemm( C, A, B, ET(-scalar), ET(1) );
7545  }
7546  }
7547 #endif
7548  //**********************************************************************************************
7549 
7550  //**Subtraction assignment to sparse matrices***************************************************
7551  // No special implementation for the subtraction assignment to sparse matrices.
7552  //**********************************************************************************************
7553 
7554  //**Multiplication assignment to dense matrices*************************************************
7555  // No special implementation for the multiplication assignment to dense matrices.
7556  //**********************************************************************************************
7557 
7558  //**Multiplication assignment to sparse matrices************************************************
7559  // No special implementation for the multiplication assignment to sparse matrices.
7560  //**********************************************************************************************
7561 
7562  //**SMP assignment to dense matrices************************************************************
7577  template< typename MT // Type of the target dense matrix
7578  , bool SO > // Storage order of the target dense matrix
7579  friend inline EnableIf_< IsEvaluationRequired<MT,MT1,MT2> >
7580  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7581  {
7583 
7584  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7585  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7586 
7587  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7588  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7589 
7590  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
7591  return;
7592  }
7593  else if( left.columns() == 0UL ) {
7594  reset( ~lhs );
7595  return;
7596  }
7597 
7598  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7599  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7600 
7601  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7602  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7603  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7604  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7605  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7606  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7607 
7608  smpAssign( ~lhs, A * B * rhs.scalar_ );
7609  }
7610  //**********************************************************************************************
7611 
7612  //**SMP assignment to sparse matrices***********************************************************
7627  template< typename MT // Type of the target sparse matrix
7628  , bool SO > // Storage order of the target sparse matrix
7629  friend inline EnableIf_< IsEvaluationRequired<MT,MT1,MT2> >
7630  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7631  {
7633 
7634  typedef IfTrue_< SO, OppositeType, ResultType > TmpType;
7635 
7641  BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE( CompositeType_<TmpType> );
7642 
7643  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7644  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7645 
7646  const TmpType tmp( rhs );
7647  smpAssign( ~lhs, tmp );
7648  }
7649  //**********************************************************************************************
7650 
7651  //**SMP addition assignment to dense matrices***************************************************
7666  template< typename MT // Type of the target dense matrix
7667  , bool SO > // Storage order of the target dense matrix
7668  friend inline EnableIf_< IsEvaluationRequired<MT,MT1,MT2> >
7669  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7670  {
7672 
7673  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7674  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7675 
7676  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7677  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7678 
7679  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7680  return;
7681  }
7682 
7683  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7684  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7685 
7686  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7687  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7688  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7689  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7690  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7691  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7692 
7693  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
7694  }
7695  //**********************************************************************************************
7696 
7697  //**SMP addition assignment to sparse matrices**************************************************
7698  // No special implementation for the SMP addition assignment to sparse matrices.
7699  //**********************************************************************************************
7700 
7701  //**SMP subtraction assignment to dense matrices************************************************
7716  template< typename MT // Type of the target dense matrix
7717  , bool SO > // Storage order of the target dense matrix
7718  friend inline EnableIf_< IsEvaluationRequired<MT,MT1,MT2> >
7719  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7720  {
7722 
7723  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7724  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7725 
7726  LeftOperand_<MMM> left ( rhs.matrix_.leftOperand() );
7727  RightOperand_<MMM> right( rhs.matrix_.rightOperand() );
7728 
7729  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7730  return;
7731  }
7732 
7733  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7734  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7735 
7736  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7737  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7738  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7739  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7740  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7741  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7742 
7743  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
7744  }
7745  //**********************************************************************************************
7746 
7747  //**SMP subtraction assignment to sparse matrices***********************************************
7748  // No special implementation for the SMP subtraction assignment to sparse matrices.
7749  //**********************************************************************************************
7750 
7751  //**SMP multiplication assignment to dense matrices*********************************************
7752  // No special implementation for the SMP multiplication assignment to dense matrices.
7753  //**********************************************************************************************
7754 
7755  //**SMP multiplication assignment to sparse matrices********************************************
7756  // No special implementation for the SMP multiplication assignment to sparse matrices.
7757  //**********************************************************************************************
7758 
7759  //**Compile time checks*************************************************************************
7767  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
7768  //**********************************************************************************************
7769 };
7771 //*************************************************************************************************
7772 
7773 
7774 
7775 
7776 //=================================================================================================
7777 //
7778 // GLOBAL BINARY ARITHMETIC OPERATORS
7779 //
7780 //=================================================================================================
7781 
7782 //*************************************************************************************************
7811 template< typename T1 // Type of the left-hand side dense matrix
7812  , typename T2 > // Type of the right-hand side dense matrix
7813 inline const DMatTDMatMultExpr<T1,T2>
7815 {
7817 
7818  if( (~lhs).columns() != (~rhs).rows() ) {
7819  BLAZE_THROW_INVALID_ARGUMENT( "Matrix sizes do not match" );
7820  }
7821 
7822  return DMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
7823 }
7824 //*************************************************************************************************
7825 
7826 
7827 
7828 
7829 //=================================================================================================
7830 //
7831 // ROWS SPECIALIZATIONS
7832 //
7833 //=================================================================================================
7834 
7835 //*************************************************************************************************
7837 template< typename MT1, typename MT2 >
7838 struct Rows< DMatTDMatMultExpr<MT1,MT2> > : public Rows<MT1>
7839 {};
7841 //*************************************************************************************************
7842 
7843 
7844 
7845 
7846 //=================================================================================================
7847 //
7848 // COLUMNS SPECIALIZATIONS
7849 //
7850 //=================================================================================================
7851 
7852 //*************************************************************************************************
7854 template< typename MT1, typename MT2 >
7855 struct Columns< DMatTDMatMultExpr<MT1,MT2> > : public Columns<MT2>
7856 {};
7858 //*************************************************************************************************
7859 
7860 
7861 
7862 
7863 //=================================================================================================
7864 //
7865 // ISALIGNED SPECIALIZATIONS
7866 //
7867 //=================================================================================================
7868 
7869 //*************************************************************************************************
7871 template< typename MT1, typename MT2 >
7872 struct IsAligned< DMatTDMatMultExpr<MT1,MT2> >
7873  : public BoolConstant< And< IsAligned<MT1>, IsAligned<MT2> >::value >
7874 {};
7876 //*************************************************************************************************
7877 
7878 
7879 
7880 
7881 //=================================================================================================
7882 //
7883 // ISLOWER SPECIALIZATIONS
7884 //
7885 //=================================================================================================
7886 
7887 //*************************************************************************************************
7889 template< typename MT1, typename MT2 >
7890 struct IsLower< DMatTDMatMultExpr<MT1,MT2> >
7891  : public BoolConstant< And< IsLower<MT1>, IsLower<MT2> >::value >
7892 {};
7894 //*************************************************************************************************
7895 
7896 
7897 
7898 
7899 //=================================================================================================
7900 //
7901 // ISUNILOWER SPECIALIZATIONS
7902 //
7903 //=================================================================================================
7904 
7905 //*************************************************************************************************
7907 template< typename MT1, typename MT2 >
7908 struct IsUniLower< DMatTDMatMultExpr<MT1,MT2> >
7909  : public BoolConstant< And< IsUniLower<MT1>, IsUniLower<MT2> >::value >
7910 {};
7912 //*************************************************************************************************
7913 
7914 
7915 
7916 
7917 //=================================================================================================
7918 //
7919 // ISSTRICTLYLOWER SPECIALIZATIONS
7920 //
7921 //=================================================================================================
7922 
7923 //*************************************************************************************************
7925 template< typename MT1, typename MT2 >
7926 struct IsStrictlyLower< DMatTDMatMultExpr<MT1,MT2> >
7927  : public BoolConstant< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
7928  , And< IsStrictlyLower<MT2>, IsLower<MT1> > >::value >
7929 {};
7931 //*************************************************************************************************
7932 
7933 
7934 
7935 
7936 //=================================================================================================
7937 //
7938 // ISUPPER SPECIALIZATIONS
7939 //
7940 //=================================================================================================
7941 
7942 //*************************************************************************************************
7944 template< typename MT1, typename MT2 >
7945 struct IsUpper< DMatTDMatMultExpr<MT1,MT2> >
7946  : public BoolConstant< And< IsUpper<MT1>, IsUpper<MT2> >::value >
7947 {};
7949 //*************************************************************************************************
7950 
7951 
7952 
7953 
7954 //=================================================================================================
7955 //
7956 // ISUNIUPPER SPECIALIZATIONS
7957 //
7958 //=================================================================================================
7959 
7960 //*************************************************************************************************
7962 template< typename MT1, typename MT2 >
7963 struct IsUniUpper< DMatTDMatMultExpr<MT1,MT2> >
7964  : public BoolConstant< And< IsUniUpper<MT1>, IsUniUpper<MT2> >::value >
7965 {};
7967 //*************************************************************************************************
7968 
7969 
7970 
7971 
7972 //=================================================================================================
7973 //
7974 // ISSTRICTLYUPPER SPECIALIZATIONS
7975 //
7976 //=================================================================================================
7977 
7978 //*************************************************************************************************
7980 template< typename MT1, typename MT2 >
7981 struct IsStrictlyUpper< DMatTDMatMultExpr<MT1,MT2> >
7982  : public BoolConstant< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
7983  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > >::value >
7984 {};
7986 //*************************************************************************************************
7987 
7988 
7989 
7990 
7991 //=================================================================================================
7992 //
7993 // EXPRESSION TRAIT SPECIALIZATIONS
7994 //
7995 //=================================================================================================
7996 
7997 //*************************************************************************************************
7999 template< typename MT1, typename MT2, typename VT >
8000 struct DMatDVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
8001 {
8002  public:
8003  //**********************************************************************************************
8004  using Type = If_< And< IsDenseMatrix<MT1>, IsRowMajorMatrix<MT1>
8005  , IsDenseMatrix<MT2>, IsColumnMajorMatrix<MT2>
8006  , IsDenseVector<VT>, IsColumnVector<VT> >
8007  , DMatDVecMultExprTrait_< MT1, TDMatDVecMultExprTrait_<MT2,VT> >
8008  , INVALID_TYPE >;
8009  //**********************************************************************************************
8010 };
8012 //*************************************************************************************************
8013 
8014 
8015 //*************************************************************************************************
8017 template< typename MT1, typename MT2, typename VT >
8018 struct DMatSVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
8019 {
8020  public:
8021  //**********************************************************************************************
8022  using Type = If_< And< IsDenseMatrix<MT1>, IsRowMajorMatrix<MT1>
8023  , IsDenseMatrix<MT2>, IsColumnMajorMatrix<MT2>
8024  , IsSparseVector<VT>, IsColumnVector<VT> >
8025  , DMatDVecMultExprTrait_< MT1, TDMatSVecMultExprTrait_<MT2,VT> >
8026  , INVALID_TYPE >;
8027  //**********************************************************************************************
8028 };
8030 //*************************************************************************************************
8031 
8032 
8033 //*************************************************************************************************
8035 template< typename VT, typename MT1, typename MT2 >
8036 struct TDVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
8037 {
8038  public:
8039  //**********************************************************************************************
8040  using Type = If_< And< IsDenseVector<VT>, IsRowVector<VT>
8041  , IsDenseMatrix<MT1>, IsRowMajorMatrix<MT1>
8042  , IsDenseMatrix<MT2>, IsColumnMajorMatrix<MT2> >
8043  , TDVecTDMatMultExprTrait_< TDVecDMatMultExprTrait_<VT,MT1>, MT2 >
8044  , INVALID_TYPE >;
8045  //**********************************************************************************************
8046 };
8048 //*************************************************************************************************
8049 
8050 
8051 //*************************************************************************************************
8053 template< typename VT, typename MT1, typename MT2 >
8054 struct TSVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
8055 {
8056  public:
8057  //**********************************************************************************************
8058  using Type = If_< And< IsSparseVector<VT>, IsRowVector<VT>
8059  , IsDenseMatrix<MT1>, IsRowMajorMatrix<MT1>
8060  , IsDenseMatrix<MT2>, IsColumnMajorMatrix<MT2> >
8061  , TDVecTDMatMultExprTrait_< TSVecDMatMultExprTrait_<VT,MT1>, MT2 >
8062  , INVALID_TYPE >;
8063  //**********************************************************************************************
8064 };
8066 //*************************************************************************************************
8067 
8068 
8069 //*************************************************************************************************
8071 template< typename MT1, typename MT2, bool AF >
8072 struct SubmatrixExprTrait< DMatTDMatMultExpr<MT1,MT2>, AF >
8073 {
8074  public:
8075  //**********************************************************************************************
8076  using Type = MultExprTrait_< SubmatrixExprTrait_<const MT1,AF>
8077  , SubmatrixExprTrait_<const MT2,AF> >;
8078  //**********************************************************************************************
8079 };
8081 //*************************************************************************************************
8082 
8083 
8084 //*************************************************************************************************
8086 template< typename MT1, typename MT2 >
8087 struct RowExprTrait< DMatTDMatMultExpr<MT1,MT2> >
8088 {
8089  public:
8090  //**********************************************************************************************
8091  using Type = MultExprTrait_< RowExprTrait_<const MT1>, MT2 >;
8092  //**********************************************************************************************
8093 };
8095 //*************************************************************************************************
8096 
8097 
8098 //*************************************************************************************************
8100 template< typename MT1, typename MT2 >
8101 struct ColumnExprTrait< DMatTDMatMultExpr<MT1,MT2> >
8102 {
8103  public:
8104  //**********************************************************************************************
8105  using Type = MultExprTrait_< MT1, ColumnExprTrait_<const MT2> >;
8106  //**********************************************************************************************
8107 };
8109 //*************************************************************************************************
8110 
8111 } // namespace blaze
8112 
8113 #endif
#define BLAZE_THROW_INVALID_ARGUMENT(MESSAGE)
Macro for the emission of a std::invalid_argument exception.This macro encapsulates the default way o...
Definition: Exception.h:235
OppositeType_< ResultType > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:229
IfTrue_< evaluateLeft, const RT1, CT1 > LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:243
Header file for auxiliary alias declarations.
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:72
Constraint on the data type.
Header file for kernel specific block sizes.
Header file for mathematical functions.
constexpr bool useOptimizedKernels
Configuration switch for optimized kernels.This configuration switch enables/disables all optimized c...
Definition: Optimizations.h:84
If_< IsExpression< MT2 >, const MT2, const MT2 & > RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:240
Header file for the Rows type trait.
Header file for the IsUniUpper type trait.
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:7800
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:87
Header file for basic type definitions.
BLAZE_ALWAYS_INLINE const complex< int8_t > sum(const SIMDcint8 &a) noexcept
Returns the sum of all elements in the 8-bit integral complex SIMD vector.
Definition: Reduction.h:63
EnableIf_< IsDenseMatrix< MT1 > > smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the serial shim.
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:287
LeftOperand leftOperand() const noexcept
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:371
Header file for the IsDiagonal type trait.
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector) noexcept
Returns the current size/dimension of the vector.
Definition: Vector.h:258
size_t rows() const noexcept
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:351
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:61
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
BLAZE_ALWAYS_INLINE MT::Iterator begin(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator to the first element of row/column i.
Definition: Matrix.h:188
Availability of a SIMD multiplication for the given data types.Depending on the available instruction...
Definition: HasSIMDMult.h:162
typename SIMDTrait< T >::Type SIMDTrait_
Auxiliary alias declaration for the SIMDTrait class template.The SIMDTrait_ alias declaration provide...
Definition: SIMDTrait.h:315
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:533
Header file for the IsRowVector type trait.
DMatTDMatMultExpr< MT1, MT2 > This
Type of this DMatTDMatMultExpr instance.
Definition: DMatTDMatMultExpr.h:227
Header file for the And class template.
const ElementType_< MT > min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1669
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:88
ReturnType at(size_t i, size_t j) const
Checked access to the matrix elements.
Definition: DMatTDMatMultExpr.h:335
Availability of a SIMD addition for the given data types.Depending on the available instruction set (...
Definition: HasSIMDAdd.h:162
Header file for the TDVecSMatMultExprTrait class template.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:723
typename MultTrait< T1, T2 >::Type MultTrait_
Auxiliary alias declaration for the MultTrait class template.The MultTrait_ alias declaration provide...
Definition: MultTrait.h:245
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:88
Constraints on the storage order of matrix types.
bool isAliased(const T *alias) const noexcept
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:405
Header file for the RequiresEvaluation type trait.
ElementType_< ResultType > ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:231
System settings for performance optimizations.
ResultType_< MT2 > RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:154
Header file for the IsUniLower type trait.
Header file for the IsBLASCompatible type trait.
typename T::ResultType ResultType_
Alias declaration for nested ResultType type definitions.The ResultType_ alias declaration provides a...
Definition: Aliases.h:323
const ElementType_< MT > max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1716
EnableIf_< IsDenseMatrix< MT1 > > smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT > >, ColumnExprTrait_< MT > > column(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific column of the given matrix.
Definition: Column.h:126
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
typename IfTrue< Condition, T1, T2 >::Type IfTrue_
Auxiliary alias declaration for the IfTrue class template.The IfTrue_ alias declaration provides a co...
Definition: If.h:109
Header file for the IsComplexDouble type trait.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:72
SubvectorExprTrait_< VT, unaligned > subvector(Vector< VT, TF > &vector, size_t index, size_t size)
Creating a view on a specific subvector of the given vector.
Definition: Subvector.h:152
size_t columns() const noexcept
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:361
typename T::CompositeType CompositeType_
Alias declaration for nested CompositeType type definitions.The CompositeType_ alias declaration prov...
Definition: Aliases.h:83
SIMDTrait_< ElementType > SIMDType
Resulting SIMD element type.
Definition: DMatTDMatMultExpr.h:232
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:234
ElementType_< RT1 > ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:155
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
TransposeType_< ResultType > TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:230
Header file for the IsSymmetric type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
Header file for the If class template.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: ColumnMajorMatrix.h:61
EnableIf_< IsDenseMatrix< MT1 > > smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Header file for the Or class template.
Header file for the TDMatSVecMultExprTrait class template.
bool canSMPAssign() const noexcept
Returns whether the expression can be used in SMP assignments.
Definition: DMatTDMatMultExpr.h:425
#define BLAZE_THROW_OUT_OF_RANGE(MESSAGE)
Macro for the emission of a std::out_of_range exception.This macro encapsulates the default way of Bl...
Definition: Exception.h:331
Header file for the HasSIMDAdd type trait.
Header file for the DenseMatrix base class.
Header file for the Columns type trait.
Header file for the Not class template.
typename T::ElementType ElementType_
Alias declaration for nested ElementType type definitions.The ElementType_ alias declaration provides...
Definition: Aliases.h:163
Header file for all SIMD functionality.
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:71
Header file for the IsLower type trait.
Header file for the IsAligned type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:90
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:60
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Header file for the exception macros of the math module.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:254
MultTrait_< RT1, RT2 > ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:228
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
#define BLAZE_USE_BLAS_MATRIX_MATRIX_MULTIPLICATION
Compilation switch for the BLAS matrix/matrix multiplication kernels (gemv).This compilation switch e...
Definition: BLAS.h:93
If_< IsExpression< MT1 >, const MT1, const MT1 & > LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:237
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:109
IfTrue_< evaluateRight, const RT2, CT2 > RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:246
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
DisableIf_< Or< IsComputation< MT >, IsTransExpr< MT > >, RowExprTrait_< MT > > row(Matrix< MT, SO > &matrix, size_t index)
Creating a view on a specific row of the given matrix.
Definition: Row.h:126
System settings for the BLAS mode.
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
bool isAligned() const noexcept
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatTDMatMultExpr.h:415
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: RowMajorMatrix.h:61
Header file for the HasSIMDMult type trait.
CompositeType_< MT1 > CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:157
Header file for the MatScalarMultExpr base class.
Header file for run time assertion macros.
Utility type for generic codes.
CompositeType_< MT2 > CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:158
typename If< T1, T2, T3 >::Type If_
Auxiliary alias declaration for the If class template.The If_ alias declaration provides a convenient...
Definition: If.h:160
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:61
Header file for the reset shim.
Constraints on the storage order of matrix types.
Header file for the HasMutableDataAccess type trait.
bool canAlias(const T *alias) const noexcept
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:393
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:113
IntegralConstant< bool, B > BoolConstant
Generic wrapper for a compile time constant boolean value.The BoolConstant class template represents ...
Definition: IntegralConstant.h:100
Header file for BLAS triangular matrix/matrix multiplication functions (trmm)
typename T::OppositeType OppositeType_
Alias declaration for nested OppositeType type definitions.The OppositeType_ alias declaration provid...
Definition: Aliases.h:243
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:84
Header file for the IsDenseVector type trait.
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
Header file for the AreSIMDCombinable type trait.
Header file for the IsRowMajorMatrix type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:55
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs) noexcept
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:272
Header file for the IsComputation type trait class.
Header file for the IsBuiltin type trait.
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:233
Header file for the TDMatDVecMultExprTrait class template.
ResultType_< MT1 > RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:153
Header file for BLAS general matrix/matrix multiplication functions (gemm)
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
Header file for the IsComplexFloat type trait.
Header file for the IntegralConstant class template.
Header file for the IsComplex type trait.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:434
Header file for the complex data type.
RightOperand rightOperand() const noexcept
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:381
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:147
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:435
typename T::TransposeType TransposeType_
Alias declaration for nested TransposeType type definitions.The TransposeType_ alias declaration prov...
Definition: Aliases.h:403
Header file for the IsUpper type trait.
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
ElementType_< RT2 > ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:156