TDMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <blaze/math/blas/Level3.h>
54 #include <blaze/math/Functions.h>
55 #include <blaze/math/Intrinsics.h>
56 #include <blaze/math/shims/Reset.h>
91 #include <blaze/system/BLAS.h>
93 #include <blaze/util/Assert.h>
94 #include <blaze/util/Complex.h>
98 #include <blaze/util/DisableIf.h>
99 #include <blaze/util/EnableIf.h>
100 #include <blaze/util/InvalidType.h>
102 #include <blaze/util/mpl/And.h>
103 #include <blaze/util/mpl/Not.h>
104 #include <blaze/util/mpl/Or.h>
105 #include <blaze/util/SelectType.h>
106 #include <blaze/util/Types.h>
113 
114 
115 namespace blaze {
116 
117 //=================================================================================================
118 //
119 // CLASS TDMATTDMATMULTEXPR
120 //
121 //=================================================================================================
122 
123 //*************************************************************************************************
130 template< typename MT1 // Type of the left-hand side dense matrix
131  , typename MT2 > // Type of the right-hand side dense matrix
132 class TDMatTDMatMultExpr : public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
133  , private MatMatMultExpr
134  , private Computation
135 {
136  private:
137  //**Type definitions****************************************************************************
138  typedef typename MT1::ResultType RT1;
139  typedef typename MT2::ResultType RT2;
140  typedef typename RT1::ElementType ET1;
141  typedef typename RT2::ElementType ET2;
142  typedef typename MT1::CompositeType CT1;
143  typedef typename MT2::CompositeType CT2;
144  //**********************************************************************************************
145 
146  //**********************************************************************************************
149  //**********************************************************************************************
150 
151  //**********************************************************************************************
153  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
154  //**********************************************************************************************
155 
156  //**********************************************************************************************
158 
164  template< typename T1, typename T2, typename T3 >
165  struct CanExploitSymmetry {
166  enum { value = IsRowMajorMatrix<T1>::value &&
167  ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
168  };
170  //**********************************************************************************************
171 
172  //**********************************************************************************************
174 
178  template< typename T1, typename T2, typename T3 >
179  struct IsEvaluationRequired {
180  enum { value = ( evaluateLeft || evaluateRight ) &&
181  CanExploitSymmetry<T1,T2,T3>::value };
182  };
184  //**********************************************************************************************
185 
186  //**********************************************************************************************
188 
191  template< typename T1, typename T2, typename T3 >
192  struct UseSinglePrecisionKernel {
193  enum { value = BLAZE_BLAS_MODE &&
194  HasMutableDataAccess<T1>::value &&
195  HasConstDataAccess<T2>::value &&
196  HasConstDataAccess<T3>::value &&
197  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
198  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
199  IsFloat<typename T1::ElementType>::value &&
200  IsFloat<typename T2::ElementType>::value &&
201  IsFloat<typename T3::ElementType>::value };
202  };
204  //**********************************************************************************************
205 
206  //**********************************************************************************************
208 
211  template< typename T1, typename T2, typename T3 >
212  struct UseDoublePrecisionKernel {
213  enum { value = BLAZE_BLAS_MODE &&
214  HasMutableDataAccess<T1>::value &&
215  HasConstDataAccess<T2>::value &&
216  HasConstDataAccess<T3>::value &&
217  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
218  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
219  IsDouble<typename T1::ElementType>::value &&
220  IsDouble<typename T2::ElementType>::value &&
221  IsDouble<typename T3::ElementType>::value };
222  };
224  //**********************************************************************************************
225 
226  //**********************************************************************************************
228 
232  template< typename T1, typename T2, typename T3 >
233  struct UseSinglePrecisionComplexKernel {
234  typedef complex<float> Type;
235  enum { value = BLAZE_BLAS_MODE &&
236  HasMutableDataAccess<T1>::value &&
237  HasConstDataAccess<T2>::value &&
238  HasConstDataAccess<T3>::value &&
239  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
240  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
241  IsSame<typename T1::ElementType,Type>::value &&
242  IsSame<typename T2::ElementType,Type>::value &&
243  IsSame<typename T3::ElementType,Type>::value };
244  };
246  //**********************************************************************************************
247 
248  //**********************************************************************************************
250 
254  template< typename T1, typename T2, typename T3 >
255  struct UseDoublePrecisionComplexKernel {
256  typedef complex<double> Type;
257  enum { value = BLAZE_BLAS_MODE &&
258  HasMutableDataAccess<T1>::value &&
259  HasConstDataAccess<T2>::value &&
260  HasConstDataAccess<T3>::value &&
261  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
262  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
263  IsSame<typename T1::ElementType,Type>::value &&
264  IsSame<typename T2::ElementType,Type>::value &&
265  IsSame<typename T3::ElementType,Type>::value };
266  };
268  //**********************************************************************************************
269 
270  //**********************************************************************************************
272 
275  template< typename T1, typename T2, typename T3 >
276  struct UseDefaultKernel {
277  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
278  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
279  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
280  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
281  };
283  //**********************************************************************************************
284 
285  //**********************************************************************************************
287 
290  template< typename T1, typename T2, typename T3 >
291  struct UseVectorizedDefaultKernel {
292  enum { value = !IsDiagonal<T2>::value &&
293  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
294  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
295  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
296  IntrinsicTrait<typename T1::ElementType>::addition &&
297  IntrinsicTrait<typename T1::ElementType>::subtraction &&
298  IntrinsicTrait<typename T1::ElementType>::multiplication };
299  };
301  //**********************************************************************************************
302 
303  public:
304  //**Type definitions****************************************************************************
311  typedef const ElementType ReturnType;
312  typedef const ResultType CompositeType;
313 
315  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
316 
318  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
319 
322 
325  //**********************************************************************************************
326 
327  //**Compilation flags***************************************************************************
329  enum { vectorizable = !IsDiagonal<MT1>::value &&
330  MT1::vectorizable && MT2::vectorizable &&
334 
336  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
337  !evaluateRight && MT2::smpAssignable };
338  //**********************************************************************************************
339 
340  //**Constructor*********************************************************************************
346  explicit inline TDMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
347  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
348  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
349  {
350  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
351  }
352  //**********************************************************************************************
353 
354  //**Access operator*****************************************************************************
361  inline ReturnType operator()( size_t i, size_t j ) const {
362  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
363  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
364 
365  const size_t kbegin( ( IsUpper<MT1>::value )
366  ?( ( IsLower<MT2>::value )
367  ?( max( ( IsStrictlyUpper<MT1>::value ? i+1UL : i )
368  , ( IsStrictlyLower<MT2>::value ? j+1UL : j ) ) )
369  :( IsStrictlyUpper<MT1>::value ? i+1UL : i ) )
370  :( ( IsLower<MT2>::value )
371  ?( IsStrictlyLower<MT2>::value ? j+1UL : j )
372  :( 0UL ) ) );
373  const size_t kend( ( IsLower<MT1>::value )
374  ?( ( IsUpper<MT2>::value )
375  ?( min( ( IsStrictlyLower<MT1>::value ? i : i+1UL )
376  , ( IsStrictlyUpper<MT2>::value ? j : j+1UL ) ) )
377  :( IsStrictlyLower<MT1>::value ? i : i+1UL ) )
378  :( ( IsUpper<MT2>::value )
379  ?( IsStrictlyUpper<MT2>::value ? j : j+1UL )
380  :( lhs_.columns() ) ) );
381 
382  if( lhs_.columns() == 0UL ||
383  ( ( IsTriangular<MT1>::value || IsTriangular<MT2>::value ) && kbegin >= kend ) )
384  return ElementType();
385 
387  return lhs_(i,i) * rhs_(i,j);
388 
390  return lhs_(i,j) * rhs_(j,j);
391 
392  const size_t knum( kend - kbegin );
393  const size_t kpos( kbegin + ( ( knum - 1UL ) & size_t(-2) ) + 1UL );
394 
395  ElementType tmp( lhs_(i,kbegin) * rhs_(kbegin,j) );
396 
397  for( size_t k=kbegin+1UL; k<kpos; k+=2UL ) {
398  tmp += lhs_(i,k ) * rhs_(k ,j);
399  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
400  }
401  if( kpos < kend ) {
402  tmp += lhs_(i,kpos) * rhs_(kpos,j);
403  }
404 
405  return tmp;
406  }
407  //**********************************************************************************************
408 
409  //**Rows function*******************************************************************************
414  inline size_t rows() const {
415  return lhs_.rows();
416  }
417  //**********************************************************************************************
418 
419  //**Columns function****************************************************************************
424  inline size_t columns() const {
425  return rhs_.columns();
426  }
427  //**********************************************************************************************
428 
429  //**Left operand access*************************************************************************
434  inline LeftOperand leftOperand() const {
435  return lhs_;
436  }
437  //**********************************************************************************************
438 
439  //**Right operand access************************************************************************
444  inline RightOperand rightOperand() const {
445  return rhs_;
446  }
447  //**********************************************************************************************
448 
449  //**********************************************************************************************
455  template< typename T >
456  inline bool canAlias( const T* alias ) const {
457  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
458  }
459  //**********************************************************************************************
460 
461  //**********************************************************************************************
467  template< typename T >
468  inline bool isAliased( const T* alias ) const {
469  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
470  }
471  //**********************************************************************************************
472 
473  //**********************************************************************************************
478  inline bool isAligned() const {
479  return lhs_.isAligned() && rhs_.isAligned();
480  }
481  //**********************************************************************************************
482 
483  //**********************************************************************************************
488  inline bool canSMPAssign() const {
489  return ( !BLAZE_BLAS_IS_PARALLEL ||
490  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
492  }
493  //**********************************************************************************************
494 
495  private:
496  //**Member variables****************************************************************************
497  LeftOperand lhs_;
498  RightOperand rhs_;
499  //**********************************************************************************************
500 
501  //**Assignment to dense matrices****************************************************************
514  template< typename MT // Type of the target dense matrix
515  , bool SO > // Storage order of the target dense matrix
516  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
517  assign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
518  {
520 
521  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
522  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
523 
524  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
525  return;
526  }
527  else if( rhs.lhs_.columns() == 0UL ) {
528  reset( ~lhs );
529  return;
530  }
531 
532  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
533  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
534 
535  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
536  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
537  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
538  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
539  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
540  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
541 
542  TDMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
543  }
545  //**********************************************************************************************
546 
547  //**Assignment to dense matrices (kernel selection)*********************************************
558  template< typename MT3 // Type of the left-hand side target matrix
559  , typename MT4 // Type of the left-hand side matrix operand
560  , typename MT5 > // Type of the right-hand side matrix operand
561  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
562  {
563  if( ( IsDiagonal<MT4>::value ) ||
564  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
565  selectSmallAssignKernel( C, A, B );
566  else
567  selectBlasAssignKernel( C, A, B );
568  }
570  //**********************************************************************************************
571 
572  //**Default assignment to dense matrices (general/general)**************************************
586  template< typename MT3 // Type of the left-hand side target matrix
587  , typename MT4 // Type of the left-hand side matrix operand
588  , typename MT5 > // Type of the right-hand side matrix operand
589  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
590  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
591  {
592  const size_t M( A.rows() );
593  const size_t N( B.columns() );
594  const size_t K( A.columns() );
595 
596  for( size_t j=0UL; j<N; ++j )
597  {
598  const size_t kbegin( ( IsLower<MT5>::value )
599  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
600  :( 0UL ) );
601  const size_t kend( ( IsUpper<MT5>::value )
602  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
603  :( K ) );
604  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
605 
606  if( IsStrictlyTriangular<MT5>::value && kbegin == kend ) {
607  for( size_t i=0UL; i<M; ++i ) {
608  reset( (~C)(i,j) );
609  }
610  continue;
611  }
612 
613  {
614  const size_t ibegin( ( IsLower<MT4>::value )
615  ?( IsStrictlyLower<MT4>::value ? kbegin+1UL : kbegin )
616  :( 0UL ) );
617  const size_t iend( ( IsUpper<MT4>::value )
618  ?( IsStrictlyUpper<MT4>::value ? kbegin : kbegin+1UL )
619  :( M ) );
620  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
621 
622  if( IsLower<MT4>::value && IsLower<MT5>::value ) {
623  for( size_t i=0UL; i<ibegin; ++i ) {
624  reset( C(i,j) );
625  }
626  }
627  else if( IsStrictlyLower<MT4>::value ) {
628  reset( C(0UL,j) );
629  }
630  for( size_t i=ibegin; i<iend; ++i ) {
631  C(i,j) = A(i,kbegin) * B(kbegin,j);
632  }
633  if( IsUpper<MT4>::value && IsUpper<MT5>::value ) {
634  for( size_t i=iend; i<M; ++i ) {
635  reset( C(i,j) );
636  }
637  }
638  else if( IsStrictlyUpper<MT4>::value ) {
639  reset( C(M-1UL,j) );
640  }
641  }
642 
643  for( size_t k=kbegin+1UL; k<kend; ++k )
644  {
645  const size_t ibegin( ( IsLower<MT4>::value )
646  ?( IsStrictlyLower<MT4>::value ? k+1UL : k )
647  :( 0UL ) );
648  const size_t iend( ( IsUpper<MT4>::value )
649  ?( IsStrictlyUpper<MT4>::value ? k-1UL : k )
650  :( M ) );
651  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
652 
653  for( size_t i=ibegin; i<iend; ++i ) {
654  C(i,j) += A(i,k) * B(k,j);
655  }
656  if( IsUpper<MT4>::value ) {
657  C(iend,j) = A(iend,k) * B(k,j);
658  }
659  }
660  }
661  }
663  //**********************************************************************************************
664 
665  //**Default assignment to dense matrices (general/diagonal)*************************************
679  template< typename MT3 // Type of the left-hand side target matrix
680  , typename MT4 // Type of the left-hand side matrix operand
681  , typename MT5 > // Type of the right-hand side matrix operand
682  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
683  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
684  {
686 
687  const size_t M( A.rows() );
688  const size_t N( B.columns() );
689 
690  for( size_t j=0UL; j<N; ++j )
691  {
692  const size_t ibegin( ( IsLower<MT4>::value )
693  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
694  :( 0UL ) );
695  const size_t iend( ( IsUpper<MT4>::value )
696  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
697  :( M ) );
698  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
699 
700  if( IsLower<MT4>::value ) {
701  for( size_t i=0UL; i<ibegin; ++i ) {
702  reset( C(i,j) );
703  }
704  }
705  for( size_t i=ibegin; i<iend; ++i ) {
706  C(i,j) = A(i,j) * B(j,j);
707  }
708  if( IsUpper<MT4>::value ) {
709  for( size_t i=iend; i<M; ++i ) {
710  reset( C(i,j) );
711  }
712  }
713  }
714  }
716  //**********************************************************************************************
717 
718  //**Default assignment to dense matrices (diagonal/general)*************************************
732  template< typename MT3 // Type of the left-hand side target matrix
733  , typename MT4 // Type of the left-hand side matrix operand
734  , typename MT5 > // Type of the right-hand side matrix operand
735  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
736  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
737  {
739 
740  const size_t M( A.rows() );
741  const size_t N( B.columns() );
742 
743  for( size_t j=0UL; j<N; ++j )
744  {
745  const size_t ibegin( ( IsLower<MT5>::value )
746  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
747  :( 0UL ) );
748  const size_t iend( ( IsUpper<MT5>::value )
749  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
750  :( M ) );
751  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
752 
753  if( IsLower<MT4>::value ) {
754  for( size_t i=0UL; i<ibegin; ++i ) {
755  reset( C(i,j) );
756  }
757  }
758  for( size_t i=ibegin; i<iend; ++i ) {
759  C(i,j) = A(i,i) * B(i,j);
760  }
761  if( IsUpper<MT4>::value ) {
762  for( size_t i=iend; i<M; ++i ) {
763  reset( C(i,j) );
764  }
765  }
766  }
767  }
769  //**********************************************************************************************
770 
771  //**Default assignment to dense matrices (diagonal/diagonal)************************************
785  template< typename MT3 // Type of the left-hand side target matrix
786  , typename MT4 // Type of the left-hand side matrix operand
787  , typename MT5 > // Type of the right-hand side matrix operand
788  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
789  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
790  {
792 
793  reset( C );
794 
795  for( size_t i=0UL; i<A.rows(); ++i ) {
796  C(i,i) = A(i,i) * B(i,i);
797  }
798  }
800  //**********************************************************************************************
801 
802  //**Default assignment to dense matrices (small matrices)***************************************
816  template< typename MT3 // Type of the left-hand side target matrix
817  , typename MT4 // Type of the left-hand side matrix operand
818  , typename MT5 > // Type of the right-hand side matrix operand
819  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
820  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B )
821  {
822  selectDefaultAssignKernel( C, A, B );
823  }
825  //**********************************************************************************************
826 
827  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
842  template< typename MT3 // Type of the left-hand side target matrix
843  , typename MT4 // Type of the left-hand side matrix operand
844  , typename MT5 > // Type of the right-hand side matrix operand
845  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
846  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
847  {
852 
853  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
854  const typename MT5::OppositeType tmp( serial( B ) );
855  assign( ~C, A * tmp );
856  }
857  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
858  const typename MT4::OppositeType tmp( serial( A ) );
859  assign( ~C, tmp * B );
860  }
861  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
862  const typename MT5::OppositeType tmp( serial( B ) );
863  assign( ~C, A * tmp );
864  }
865  else {
866  const typename MT4::OppositeType tmp( serial( A ) );
867  assign( ~C, tmp * B );
868  }
869  }
871  //**********************************************************************************************
872 
873  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
888  template< typename MT3 // Type of the left-hand side target matrix
889  , typename MT4 // Type of the left-hand side matrix operand
890  , typename MT5 > // Type of the right-hand side matrix operand
891  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
892  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
893  {
894  typedef IntrinsicTrait<ElementType> IT;
895 
896  const size_t M( A.rows() );
897  const size_t N( B.columns() );
898  const size_t K( A.columns() );
899 
900  size_t i( 0UL );
901 
902  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
903  for( size_t j=0UL; j<N; ++j )
904  {
905  const size_t kbegin( ( IsLower<MT5>::value )
906  ?( ( IsUpper<MT4>::value )
907  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
908  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
909  :( IsUpper<MT4>::value ? i : 0UL ) );
910  const size_t kend( ( IsUpper<MT5>::value )
911  ?( ( IsLower<MT4>::value )
912  ?( min( i+IT::size*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
913  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
914  :( IsLower<MT4>::value ? min( i+IT::size*8UL, K ) : K ) );
915 
916  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
917 
918  for( size_t k=kbegin; k<kend; ++k ) {
919  const IntrinsicType b1( set( B(k,j) ) );
920  xmm1 = xmm1 + A.load(i ,k) * b1;
921  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
922  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
923  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
924  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
925  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
926  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
927  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
928  }
929 
930  (~C).store( i , j, xmm1 );
931  (~C).store( i+IT::size , j, xmm2 );
932  (~C).store( i+IT::size*2UL, j, xmm3 );
933  (~C).store( i+IT::size*3UL, j, xmm4 );
934  (~C).store( i+IT::size*4UL, j, xmm5 );
935  (~C).store( i+IT::size*5UL, j, xmm6 );
936  (~C).store( i+IT::size*6UL, j, xmm7 );
937  (~C).store( i+IT::size*7UL, j, xmm8 );
938  }
939  }
940 
941  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
942  {
943  size_t j( 0UL );
944 
945  for( ; (j+2UL) <= N; j+=2UL )
946  {
947  const size_t kbegin( ( IsLower<MT5>::value )
948  ?( ( IsUpper<MT4>::value )
949  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
950  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
951  :( IsUpper<MT4>::value ? i : 0UL ) );
952  const size_t kend( ( IsUpper<MT5>::value )
953  ?( ( IsLower<MT4>::value )
954  ?( min( i+IT::size*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
955  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
956  :( IsLower<MT4>::value ? min( i+IT::size*4UL, K ) : K ) );
957 
958  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
959 
960  for( size_t k=kbegin; k<kend; ++k ) {
961  const IntrinsicType a1( A.load(i ,k) );
962  const IntrinsicType a2( A.load(i+IT::size ,k) );
963  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
964  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
965  const IntrinsicType b1( set( B(k,j ) ) );
966  const IntrinsicType b2( set( B(k,j+1UL) ) );
967  xmm1 = xmm1 + a1 * b1;
968  xmm2 = xmm2 + a2 * b1;
969  xmm3 = xmm3 + a3 * b1;
970  xmm4 = xmm4 + a4 * b1;
971  xmm5 = xmm5 + a1 * b2;
972  xmm6 = xmm6 + a2 * b2;
973  xmm7 = xmm7 + a3 * b2;
974  xmm8 = xmm8 + a4 * b2;
975  }
976 
977  (~C).store( i , j , xmm1 );
978  (~C).store( i+IT::size , j , xmm2 );
979  (~C).store( i+IT::size*2UL, j , xmm3 );
980  (~C).store( i+IT::size*3UL, j , xmm4 );
981  (~C).store( i , j+1UL, xmm5 );
982  (~C).store( i+IT::size , j+1UL, xmm6 );
983  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
984  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
985  }
986 
987  if( j < N )
988  {
989  const size_t kbegin( ( IsLower<MT5>::value )
990  ?( ( IsUpper<MT4>::value )
991  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
992  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
993  :( IsUpper<MT4>::value ? i : 0UL ) );
994  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, K ) ):( K ) );
995 
996  IntrinsicType xmm1, xmm2, xmm3, xmm4;
997 
998  for( size_t k=kbegin; k<kend; ++k ) {
999  const IntrinsicType b1( set( B(k,j) ) );
1000  xmm1 = xmm1 + A.load(i ,k) * b1;
1001  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1002  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1003  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1004  }
1005 
1006  (~C).store( i , j, xmm1 );
1007  (~C).store( i+IT::size , j, xmm2 );
1008  (~C).store( i+IT::size*2UL, j, xmm3 );
1009  (~C).store( i+IT::size*3UL, j, xmm4 );
1010  }
1011  }
1012 
1013  for( ; (i+IT::size) < M; i+=IT::size*2UL )
1014  {
1015  size_t j( 0UL );
1016 
1017  for( ; (j+2UL) <= N; j+=2UL )
1018  {
1019  const size_t kbegin( ( IsLower<MT5>::value )
1020  ?( ( IsUpper<MT4>::value )
1021  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1022  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1023  :( IsUpper<MT4>::value ? i : 0UL ) );
1024  const size_t kend( ( IsUpper<MT5>::value )
1025  ?( ( IsLower<MT4>::value )
1026  ?( min( i+IT::size*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
1027  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
1028  :( IsLower<MT4>::value ? min( i+IT::size*2UL, K ) : K ) );
1029 
1030  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1031 
1032  for( size_t k=kbegin; k<kend; ++k ) {
1033  const IntrinsicType a1( A.load(i ,k) );
1034  const IntrinsicType a2( A.load(i+IT::size,k) );
1035  const IntrinsicType b1( set( B(k,j ) ) );
1036  const IntrinsicType b2( set( B(k,j+1UL) ) );
1037  xmm1 = xmm1 + a1 * b1;
1038  xmm2 = xmm2 + a2 * b1;
1039  xmm3 = xmm3 + a1 * b2;
1040  xmm4 = xmm4 + a2 * b2;
1041  }
1042 
1043  (~C).store( i , j , xmm1 );
1044  (~C).store( i+IT::size, j , xmm2 );
1045  (~C).store( i , j+1UL, xmm3 );
1046  (~C).store( i+IT::size, j+1UL, xmm4 );
1047  }
1048 
1049  if( j < N )
1050  {
1051  const size_t kbegin( ( IsLower<MT5>::value )
1052  ?( ( IsUpper<MT4>::value )
1053  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1054  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1055  :( IsUpper<MT4>::value ? i : 0UL ) );
1056  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, K ) ):( K ) );
1057 
1058  IntrinsicType xmm1, xmm2;
1059 
1060  for( size_t k=kbegin; k<kend; ++k ) {
1061  const IntrinsicType b1( set( B(k,j) ) );
1062  xmm1 = xmm1 + A.load(i ,k) * b1;
1063  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1064  }
1065 
1066  (~C).store( i , j, xmm1 );
1067  (~C).store( i+IT::size, j, xmm2 );
1068  }
1069  }
1070 
1071  if( i < M )
1072  {
1073  size_t j( 0UL );
1074 
1075  for( ; (j+2UL) <= N; j+=2UL )
1076  {
1077  const size_t kbegin( ( IsLower<MT5>::value )
1078  ?( ( IsUpper<MT4>::value )
1079  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1080  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1081  :( IsUpper<MT4>::value ? i : 0UL ) );
1082  const size_t kend( ( IsUpper<MT5>::value )
1083  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
1084  :( K ) );
1085 
1086  IntrinsicType xmm1, xmm2;
1087 
1088  for( size_t k=kbegin; k<kend; ++k ) {
1089  const IntrinsicType a1( A.load(i,k) );
1090  xmm1 = xmm1 + a1 * set( B(k,j ) );
1091  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
1092  }
1093 
1094  (~C).store( i, j , xmm1 );
1095  (~C).store( i, j+1UL, xmm2 );
1096  }
1097 
1098  if( j < N )
1099  {
1100  const size_t kbegin( ( IsLower<MT5>::value )
1101  ?( ( IsUpper<MT4>::value )
1102  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1103  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1104  :( IsUpper<MT4>::value ? i : 0UL ) );
1105 
1106  IntrinsicType xmm1;
1107 
1108  for( size_t k=kbegin; k<K; ++k ) {
1109  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
1110  }
1111 
1112  (~C).store( i, j, xmm1 );
1113  }
1114  }
1115  }
1117  //**********************************************************************************************
1118 
1119  //**Default assignment to dense matrices (large matrices)***************************************
1133  template< typename MT3 // Type of the left-hand side target matrix
1134  , typename MT4 // Type of the left-hand side matrix operand
1135  , typename MT5 > // Type of the right-hand side matrix operand
1136  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1137  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1138  {
1139  selectDefaultAssignKernel( C, A, B );
1140  }
1142  //**********************************************************************************************
1143 
1144  //**Vectorized default assignment to row-major dense matrices (large matrices)******************
1159  template< typename MT3 // Type of the left-hand side target matrix
1160  , typename MT4 // Type of the left-hand side matrix operand
1161  , typename MT5 > // Type of the right-hand side matrix operand
1162  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1163  selectLargeAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1164  {
1165  selectSmallAssignKernel( ~C, A, B );
1166  }
1168  //**********************************************************************************************
1169 
1170  //**Vectorized default assignment to column-major dense matrices (large matrices)***************
1185  template< typename MT3 // Type of the left-hand side target matrix
1186  , typename MT4 // Type of the left-hand side matrix operand
1187  , typename MT5 > // Type of the right-hand side matrix operand
1188  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1189  selectLargeAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1190  {
1191  typedef IntrinsicTrait<ElementType> IT;
1192 
1193  const size_t M( A.rows() );
1194  const size_t N( B.columns() );
1195  const size_t K( A.columns() );
1196 
1197  const size_t iblock( 128UL );
1198  const size_t jblock( 64UL );
1199  const size_t kblock( 128UL );
1200 
1201  for( size_t ii=0UL; ii<M; ii+=iblock )
1202  {
1203  const size_t iend( min( ii+iblock, M ) );
1204 
1205  for( size_t jj=0UL; jj<N; jj+=jblock )
1206  {
1207  const size_t jend( min( jj+jblock, N ) );
1208 
1209  for( size_t j=jj; j<jend; ++j ) {
1210  for( size_t i=ii; i<iend; ++i ) {
1211  reset( (~C)(i,j) );
1212  }
1213  }
1214 
1215  for( size_t kk=0UL; kk<K; kk+=kblock )
1216  {
1217  const size_t ktmp( min( kk+kblock, K ) );
1218 
1219  size_t i( ii );
1220 
1221  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
1222  {
1223  const size_t i1( i+IT::size );
1224  const size_t i2( i+IT::size*2UL );
1225  const size_t i3( i+IT::size*3UL );
1226 
1227  size_t j( jj );
1228 
1229  for( ; (j+2UL) <= jend; j+=2UL )
1230  {
1231  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1232  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1233  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
1234  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
1235 
1236  IntrinsicType xmm1( (~C).load(i ,j ) );
1237  IntrinsicType xmm2( (~C).load(i1,j ) );
1238  IntrinsicType xmm3( (~C).load(i2,j ) );
1239  IntrinsicType xmm4( (~C).load(i3,j ) );
1240  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
1241  IntrinsicType xmm6( (~C).load(i1,j+1UL) );
1242  IntrinsicType xmm7( (~C).load(i2,j+1UL) );
1243  IntrinsicType xmm8( (~C).load(i3,j+1UL) );
1244 
1245  for( size_t k=kbegin; k<kend; ++k ) {
1246  const IntrinsicType a1( A.load(i ,k) );
1247  const IntrinsicType a2( A.load(i1,k) );
1248  const IntrinsicType a3( A.load(i2,k) );
1249  const IntrinsicType a4( A.load(i3,k) );
1250  const IntrinsicType b1( set( B(k,j ) ) );
1251  const IntrinsicType b2( set( B(k,j+1UL) ) );
1252  xmm1 = xmm1 + a1 * b1;
1253  xmm2 = xmm2 + a2 * b1;
1254  xmm3 = xmm3 + a3 * b1;
1255  xmm4 = xmm4 + a4 * b1;
1256  xmm5 = xmm5 + a1 * b2;
1257  xmm6 = xmm6 + a2 * b2;
1258  xmm7 = xmm7 + a3 * b2;
1259  xmm8 = xmm8 + a4 * b2;
1260  }
1261 
1262  (~C).store( i , j , xmm1 );
1263  (~C).store( i1, j , xmm2 );
1264  (~C).store( i2, j , xmm3 );
1265  (~C).store( i3, j , xmm4 );
1266  (~C).store( i , j+1UL, xmm5 );
1267  (~C).store( i1, j+1UL, xmm6 );
1268  (~C).store( i2, j+1UL, xmm7 );
1269  (~C).store( i3, j+1UL, xmm8 );
1270  }
1271 
1272  if( j < jend )
1273  {
1274  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1275  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1276  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
1277  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
1278 
1279  IntrinsicType xmm1( (~C).load(i ,j) );
1280  IntrinsicType xmm2( (~C).load(i1,j) );
1281  IntrinsicType xmm3( (~C).load(i2,j) );
1282  IntrinsicType xmm4( (~C).load(i3,j) );
1283 
1284  for( size_t k=kbegin; k<kend; ++k ) {
1285  const IntrinsicType b1( set( B(k,j) ) );
1286  xmm1 = xmm1 + A.load(i ,k) * b1;
1287  xmm2 = xmm2 + A.load(i1,k) * b1;
1288  xmm3 = xmm3 + A.load(i2,k) * b1;
1289  xmm4 = xmm4 + A.load(i3,k) * b1;
1290  }
1291 
1292  (~C).store( i , j, xmm1 );
1293  (~C).store( i1, j, xmm2 );
1294  (~C).store( i2, j, xmm3 );
1295  (~C).store( i3, j, xmm4 );
1296  }
1297  }
1298 
1299  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
1300  {
1301  const size_t i1( i+IT::size );
1302 
1303  size_t j( jj );
1304 
1305  for( ; (j+4UL) <= jend; j+=4UL )
1306  {
1307  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1308  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1309  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
1310  ( IsUpper<MT5>::value )?( j+4UL ):( ktmp ) ) );
1311 
1312  IntrinsicType xmm1( (~C).load(i ,j ) );
1313  IntrinsicType xmm2( (~C).load(i1,j ) );
1314  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1315  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
1316  IntrinsicType xmm5( (~C).load(i ,j+2UL) );
1317  IntrinsicType xmm6( (~C).load(i1,j+2UL) );
1318  IntrinsicType xmm7( (~C).load(i ,j+3UL) );
1319  IntrinsicType xmm8( (~C).load(i1,j+3UL) );
1320 
1321  for( size_t k=kbegin; k<kend; ++k ) {
1322  const IntrinsicType a1( A.load(i ,k) );
1323  const IntrinsicType a2( A.load(i1,k) );
1324  const IntrinsicType b1( set( B(k,j ) ) );
1325  const IntrinsicType b2( set( B(k,j+1UL) ) );
1326  const IntrinsicType b3( set( B(k,j+2UL) ) );
1327  const IntrinsicType b4( set( B(k,j+3UL) ) );
1328  xmm1 = xmm1 + a1 * b1;
1329  xmm2 = xmm2 + a2 * b1;
1330  xmm3 = xmm3 + a1 * b2;
1331  xmm4 = xmm4 + a2 * b2;
1332  xmm5 = xmm5 + a1 * b3;
1333  xmm6 = xmm6 + a2 * b3;
1334  xmm7 = xmm7 + a1 * b4;
1335  xmm8 = xmm8 + a2 * b4;
1336  }
1337 
1338  (~C).store( i , j , xmm1 );
1339  (~C).store( i1, j , xmm2 );
1340  (~C).store( i , j+1UL, xmm3 );
1341  (~C).store( i1, j+1UL, xmm4 );
1342  (~C).store( i , j+2UL, xmm5 );
1343  (~C).store( i1, j+2UL, xmm6 );
1344  (~C).store( i , j+3UL, xmm7 );
1345  (~C).store( i1, j+3UL, xmm8 );
1346  }
1347 
1348  for( ; (j+2UL) <= jend; j+=2UL )
1349  {
1350  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1351  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1352  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
1353  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
1354 
1355  IntrinsicType xmm1( (~C).load(i ,j ) );
1356  IntrinsicType xmm2( (~C).load(i1,j ) );
1357  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
1358  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
1359 
1360  for( size_t k=kbegin; k<kend; ++k ) {
1361  const IntrinsicType a1( A.load(i ,k) );
1362  const IntrinsicType a2( A.load(i1,k) );
1363  const IntrinsicType b1( set( B(k,j ) ) );
1364  const IntrinsicType b2( set( B(k,j+1UL) ) );
1365  xmm1 = xmm1 + a1 * b1;
1366  xmm2 = xmm2 + a2 * b1;
1367  xmm3 = xmm3 + a1 * b2;
1368  xmm4 = xmm4 + a2 * b2;
1369  }
1370 
1371  (~C).store( i , j , xmm1 );
1372  (~C).store( i1, j , xmm2 );
1373  (~C).store( i , j+1UL, xmm3 );
1374  (~C).store( i1, j+1UL, xmm4 );
1375  }
1376 
1377  if( j < jend )
1378  {
1379  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1380  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1381  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
1382  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
1383 
1384  IntrinsicType xmm1( (~C).load(i ,j) );
1385  IntrinsicType xmm2( (~C).load(i1,j) );
1386 
1387  for( size_t k=kbegin; k<kend; ++k ) {
1388  const IntrinsicType b1( set( B(k,j) ) );
1389  xmm1 = xmm1 + A.load(i ,k) * b1;
1390  xmm2 = xmm2 + A.load(i1,k) * b1;
1391  }
1392 
1393  (~C).store( i , j, xmm1 );
1394  (~C).store( i1, j, xmm2 );
1395  }
1396  }
1397 
1398  if( i < iend )
1399  {
1400  for( size_t j=jj; j<jend; ++j )
1401  {
1402  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
1403  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
1404  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size, ktmp ) ):( ktmp ),
1405  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
1406 
1407  IntrinsicType xmm1( (~C).load(i,j) );
1408 
1409  for( size_t k=kbegin; k<kend; ++k ) {
1410  const IntrinsicType b1( set( B(k,j) ) );
1411  xmm1 = xmm1 + A.load(i,k) * b1;
1412  }
1413 
1414  (~C).store( i, j, xmm1 );
1415  }
1416  }
1417  }
1418  }
1419  }
1420  }
1422  //**********************************************************************************************
1423 
1424  //**BLAS-based assignment to dense matrices (default)*******************************************
1438  template< typename MT3 // Type of the left-hand side target matrix
1439  , typename MT4 // Type of the left-hand side matrix operand
1440  , typename MT5 > // Type of the right-hand side matrix operand
1441  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1442  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1443  {
1444  selectLargeAssignKernel( C, A, B );
1445  }
1447  //**********************************************************************************************
1448 
1449  //**BLAS-based assignment to dense matrices (single precision)**********************************
1450 #if BLAZE_BLAS_MODE
1451 
1464  template< typename MT3 // Type of the left-hand side target matrix
1465  , typename MT4 // Type of the left-hand side matrix operand
1466  , typename MT5 > // Type of the right-hand side matrix operand
1467  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1468  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1469  {
1470  if( IsTriangular<MT4>::value ) {
1471  assign( C, B );
1472  strmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
1473  }
1474  else if( IsTriangular<MT5>::value ) {
1475  assign( C, A );
1476  strmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
1477  }
1478  else {
1479  sgemm( C, A, B, 1.0F, 0.0F );
1480  }
1481  }
1483 #endif
1484  //**********************************************************************************************
1485 
1486  //**BLAS-based assignment to dense matrices (double precision)**********************************
1487 #if BLAZE_BLAS_MODE
1488 
1501  template< typename MT3 // Type of the left-hand side target matrix
1502  , typename MT4 // Type of the left-hand side matrix operand
1503  , typename MT5 > // Type of the right-hand side matrix operand
1504  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1505  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1506  {
1507  if( IsTriangular<MT4>::value ) {
1508  assign( C, B );
1509  dtrmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
1510  }
1511  else if( IsTriangular<MT5>::value ) {
1512  assign( C, A );
1513  dtrmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
1514  }
1515  else {
1516  dgemm( C, A, B, 1.0, 0.0 );
1517  }
1518  }
1520 #endif
1521  //**********************************************************************************************
1522 
1523  //**BLAS-based assignment to dense matrices (single precision complex)**************************
1524 #if BLAZE_BLAS_MODE
1525 
1538  template< typename MT3 // Type of the left-hand side target matrix
1539  , typename MT4 // Type of the left-hand side matrix operand
1540  , typename MT5 > // Type of the right-hand side matrix operand
1541  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1542  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1543  {
1544  if( IsTriangular<MT4>::value ) {
1545  assign( C, B );
1546  ctrmm( C, A, CblasLeft,
1547  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
1548  complex<float>( 1.0F, 0.0F ) );
1549  }
1550  else if( IsTriangular<MT5>::value ) {
1551  assign( C, A );
1552  ctrmm( C, B, CblasRight,
1553  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
1554  complex<float>( 1.0F, 0.0F ) );
1555  }
1556  else {
1557  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1558  }
1559  }
1561 #endif
1562  //**********************************************************************************************
1563 
1564  //**BLAS-based assignment to dense matrices (double precision complex)**************************
1565 #if BLAZE_BLAS_MODE
1566 
1579  template< typename MT3 // Type of the left-hand side target matrix
1580  , typename MT4 // Type of the left-hand side matrix operand
1581  , typename MT5 > // Type of the right-hand side matrix operand
1582  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1583  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1584  {
1585  if( IsTriangular<MT4>::value ) {
1586  assign( C, B );
1587  ztrmm( C, A, CblasLeft,
1588  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
1589  complex<double>( 1.0, 0.0 ) );
1590  }
1591  else if( IsTriangular<MT5>::value ) {
1592  assign( C, A );
1593  ztrmm( C, B, CblasRight,
1594  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
1595  complex<double>( 1.0, 0.0 ) );
1596  }
1597  else {
1598  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1599  }
1600  }
1602 #endif
1603  //**********************************************************************************************
1604 
1605  //**Assignment to sparse matrices***************************************************************
1618  template< typename MT // Type of the target sparse matrix
1619  , bool SO > // Storage order of the target sparse matrix
1620  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1621  assign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1622  {
1624 
1625  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
1626 
1633 
1634  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1635  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1636 
1637  const TmpType tmp( serial( rhs ) );
1638  assign( ~lhs, tmp );
1639  }
1641  //**********************************************************************************************
1642 
1643  //**Restructuring assignment to row-major matrices**********************************************
1658  template< typename MT > // Type of the target matrix
1659  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1660  assign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
1661  {
1663 
1665 
1666  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1667  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1668 
1669  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1670  assign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
1671  else if( IsSymmetric<MT1>::value )
1672  assign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
1673  else
1674  assign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
1675  }
1677  //**********************************************************************************************
1678 
1679  //**Addition assignment to dense matrices*******************************************************
1692  template< typename MT // Type of the target dense matrix
1693  , bool SO > // Storage order of the target dense matrix
1694  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1695  addAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
1696  {
1698 
1699  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1700  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1701 
1702  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1703  return;
1704  }
1705 
1706  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1707  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1708 
1709  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1710  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1711  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1712  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1713  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1714  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1715 
1716  TDMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1717  }
1719  //**********************************************************************************************
1720 
1721  //**Addition assignment to dense matrices (kernel selection)************************************
1732  template< typename MT3 // Type of the left-hand side target matrix
1733  , typename MT4 // Type of the left-hand side matrix operand
1734  , typename MT5 > // Type of the right-hand side matrix operand
1735  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1736  {
1737  if( ( IsDiagonal<MT4>::value ) ||
1738  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
1739  selectSmallAddAssignKernel( C, A, B );
1740  else
1741  selectBlasAddAssignKernel( C, A, B );
1742  }
1744  //**********************************************************************************************
1745 
1746  //**Default addition assignment to dense matrices (general/general)*****************************
1760  template< typename MT3 // Type of the left-hand side target matrix
1761  , typename MT4 // Type of the left-hand side matrix operand
1762  , typename MT5 > // Type of the right-hand side matrix operand
1763  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
1764  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1765  {
1766  const size_t M( A.rows() );
1767  const size_t N( B.columns() );
1768  const size_t K( A.columns() );
1769 
1770  for( size_t j=0UL; j<N; ++j )
1771  {
1772  const size_t kbegin( ( IsLower<MT5>::value )
1773  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1774  :( 0UL ) );
1775  const size_t kend( ( IsUpper<MT5>::value )
1776  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1777  :( K ) );
1778  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
1779 
1780  for( size_t k=kbegin; k<kend; ++k )
1781  {
1782  const size_t ibegin( ( IsLower<MT4>::value )
1783  ?( IsStrictlyLower<MT4>::value ? k+1UL : k )
1784  :( 0UL ) );
1785  const size_t iend( ( IsUpper<MT4>::value )
1786  ?( IsStrictlyUpper<MT4>::value ? k : k+1UL )
1787  :( M ) );
1788  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1789 
1790  const size_t inum( iend - ibegin );
1791  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1792 
1793  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1794  C(i ,j) += A(i ,k) * B(k,j);
1795  C(i+1UL,j) += A(i+1UL,k) * B(k,j);
1796  }
1797  if( ipos < iend ) {
1798  C(ipos,j) += A(ipos,k) * B(k,j);
1799  }
1800  }
1801  }
1802  }
1804  //**********************************************************************************************
1805 
1806  //**Default addition assignment to dense matrices (general/diagonal)****************************
1820  template< typename MT3 // Type of the left-hand side target matrix
1821  , typename MT4 // Type of the left-hand side matrix operand
1822  , typename MT5 > // Type of the right-hand side matrix operand
1823  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
1824  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1825  {
1827 
1828  const size_t M( A.rows() );
1829  const size_t N( B.columns() );
1830 
1831  for( size_t j=0UL; j<N; ++j )
1832  {
1833  const size_t ibegin( ( IsLower<MT4>::value )
1834  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
1835  :( 0UL ) );
1836  const size_t iend( ( IsUpper<MT4>::value )
1837  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
1838  :( M ) );
1839  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1840 
1841  const size_t inum( iend - ibegin );
1842  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1843 
1844  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1845  C(i ,j) += A(i ,j) * B(j,j);
1846  C(i+1UL,j) += A(i+1UL,j) * B(j,j);
1847  }
1848  if( ipos < iend ) {
1849  C(ipos,j) += A(ipos,j) * B(j,j);
1850  }
1851  }
1852  }
1854  //**********************************************************************************************
1855 
1856  //**Default addition assignment to dense matrices (diagonal/general)****************************
1870  template< typename MT3 // Type of the left-hand side target matrix
1871  , typename MT4 // Type of the left-hand side matrix operand
1872  , typename MT5 > // Type of the right-hand side matrix operand
1873  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
1874  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1875  {
1877 
1878  const size_t M( A.rows() );
1879  const size_t N( B.columns() );
1880 
1881  for( size_t j=0UL; j<N; ++j )
1882  {
1883  const size_t ibegin( ( IsLower<MT5>::value )
1884  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1885  :( 0UL ) );
1886  const size_t iend( ( IsUpper<MT5>::value )
1887  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1888  :( M ) );
1889  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1890 
1891  const size_t inum( iend - ibegin );
1892  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
1893 
1894  for( size_t i=ibegin; i<ipos; i+=2UL ) {
1895  C(i ,j) += A(i ,i ) * B(i ,j);
1896  C(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j);
1897  }
1898  if( ipos < iend ) {
1899  C(ipos,j) += A(ipos,ipos) * B(ipos,j);
1900  }
1901  }
1902  }
1904  //**********************************************************************************************
1905 
1906  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
1920  template< typename MT3 // Type of the left-hand side target matrix
1921  , typename MT4 // Type of the left-hand side matrix operand
1922  , typename MT5 > // Type of the right-hand side matrix operand
1923  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
1924  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1925  {
1927 
1928  for( size_t i=0UL; i<A.rows(); ++i ) {
1929  C(i,i) += A(i,i) * B(i,i);
1930  }
1931  }
1933  //**********************************************************************************************
1934 
1935  //**Default addition assignment to dense matrices (small matrices)******************************
1949  template< typename MT3 // Type of the left-hand side target matrix
1950  , typename MT4 // Type of the left-hand side matrix operand
1951  , typename MT5 > // Type of the right-hand side matrix operand
1952  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1953  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1954  {
1955  selectDefaultAddAssignKernel( C, A, B );
1956  }
1958  //**********************************************************************************************
1959 
1960  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
1975  template< typename MT3 // Type of the left-hand side target matrix
1976  , typename MT4 // Type of the left-hand side matrix operand
1977  , typename MT5 > // Type of the right-hand side matrix operand
1978  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1979  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1980  {
1985 
1986  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1987  const typename MT5::OppositeType tmp( serial( B ) );
1988  addAssign( ~C, A * tmp );
1989  }
1990  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1991  const typename MT4::OppositeType tmp( serial( A ) );
1992  addAssign( ~C, tmp * B );
1993  }
1994  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1995  const typename MT5::OppositeType tmp( serial( B ) );
1996  addAssign( ~C, A * tmp );
1997  }
1998  else {
1999  const typename MT4::OppositeType tmp( serial( A ) );
2000  addAssign( ~C, tmp * B );
2001  }
2002  }
2004  //**********************************************************************************************
2005 
2006  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
2021  template< typename MT3 // Type of the left-hand side target matrix
2022  , typename MT4 // Type of the left-hand side matrix operand
2023  , typename MT5 > // Type of the right-hand side matrix operand
2024  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2025  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2026  {
2027  typedef IntrinsicTrait<ElementType> IT;
2028 
2029  const size_t M( A.rows() );
2030  const size_t N( B.columns() );
2031  const size_t K( A.columns() );
2032 
2033  size_t i( 0UL );
2034 
2035  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2036  for( size_t j=0UL; j<N; ++j )
2037  {
2038  const size_t kbegin( ( IsLower<MT5>::value )
2039  ?( ( IsUpper<MT4>::value )
2040  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2041  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2042  :( IsUpper<MT4>::value ? i : 0UL ) );
2043  const size_t kend( ( IsUpper<MT5>::value )
2044  ?( ( IsLower<MT4>::value )
2045  ?( min( i+IT::size*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
2046  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
2047  :( IsLower<MT4>::value ? min( i+IT::size*8UL, K ) : K ) );
2048 
2049  IntrinsicType xmm1( (~C).load(i ,j) );
2050  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
2051  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
2052  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
2053  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
2054  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
2055  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
2056  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
2057 
2058  for( size_t k=kbegin; k<kend; ++k ) {
2059  const IntrinsicType b1( set( B(k,j) ) );
2060  xmm1 = xmm1 + A.load(i ,k) * b1;
2061  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2062  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2063  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2064  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
2065  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
2066  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
2067  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
2068  }
2069 
2070  (~C).store( i , j, xmm1 );
2071  (~C).store( i+IT::size , j, xmm2 );
2072  (~C).store( i+IT::size*2UL, j, xmm3 );
2073  (~C).store( i+IT::size*3UL, j, xmm4 );
2074  (~C).store( i+IT::size*4UL, j, xmm5 );
2075  (~C).store( i+IT::size*5UL, j, xmm6 );
2076  (~C).store( i+IT::size*6UL, j, xmm7 );
2077  (~C).store( i+IT::size*7UL, j, xmm8 );
2078  }
2079  }
2080 
2081  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
2082  {
2083  size_t j( 0UL );
2084 
2085  for( ; (j+2UL) <= N; j+=2UL )
2086  {
2087  const size_t kbegin( ( IsLower<MT5>::value )
2088  ?( ( IsUpper<MT4>::value )
2089  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2090  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2091  :( IsUpper<MT4>::value ? i : 0UL ) );
2092  const size_t kend( ( IsUpper<MT5>::value )
2093  ?( ( IsLower<MT4>::value )
2094  ?( min( i+IT::size*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
2095  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
2096  :( IsLower<MT4>::value ? min( i+IT::size*4UL, K ) : K ) );
2097 
2098  IntrinsicType xmm1( (~C).load(i ,j ) );
2099  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
2100  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
2101  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
2102  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
2103  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
2104  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
2105  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
2106 
2107  for( size_t k=kbegin; k<kend; ++k ) {
2108  const IntrinsicType a1( A.load(i ,k) );
2109  const IntrinsicType a2( A.load(i+IT::size ,k) );
2110  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
2111  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
2112  const IntrinsicType b1( set( B(k,j ) ) );
2113  const IntrinsicType b2( set( B(k,j+1UL) ) );
2114  xmm1 = xmm1 + a1 * b1;
2115  xmm2 = xmm2 + a2 * b1;
2116  xmm3 = xmm3 + a3 * b1;
2117  xmm4 = xmm4 + a4 * b1;
2118  xmm5 = xmm5 + a1 * b2;
2119  xmm6 = xmm6 + a2 * b2;
2120  xmm7 = xmm7 + a3 * b2;
2121  xmm8 = xmm8 + a4 * b2;
2122  }
2123 
2124  (~C).store( i , j , xmm1 );
2125  (~C).store( i+IT::size , j , xmm2 );
2126  (~C).store( i+IT::size*2UL, j , xmm3 );
2127  (~C).store( i+IT::size*3UL, j , xmm4 );
2128  (~C).store( i , j+1UL, xmm5 );
2129  (~C).store( i+IT::size , j+1UL, xmm6 );
2130  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
2131  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
2132  }
2133 
2134  if( j < N )
2135  {
2136  const size_t kbegin( ( IsLower<MT5>::value )
2137  ?( ( IsUpper<MT4>::value )
2138  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2139  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2140  :( IsUpper<MT4>::value ? i : 0UL ) );
2141  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, K ) ):( K ) );
2142 
2143  IntrinsicType xmm1( (~C).load(i ,j) );
2144  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
2145  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
2146  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
2147 
2148  for( size_t k=kbegin; k<kend; ++k ) {
2149  const IntrinsicType b1( set( B(k,j) ) );
2150  xmm1 = xmm1 + A.load(i ,k) * b1;
2151  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2152  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2153  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2154  }
2155 
2156  (~C).store( i , j, xmm1 );
2157  (~C).store( i+IT::size , j, xmm2 );
2158  (~C).store( i+IT::size*2UL, j, xmm3 );
2159  (~C).store( i+IT::size*3UL, j, xmm4 );
2160  }
2161  }
2162 
2163  for( ; (i+IT::size) < M; i+=IT::size*2UL )
2164  {
2165  size_t j( 0UL );
2166 
2167  for( ; (j+2UL) <= N; j+=2UL )
2168  {
2169  const size_t kbegin( ( IsLower<MT5>::value )
2170  ?( ( IsUpper<MT4>::value )
2171  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2172  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2173  :( IsUpper<MT4>::value ? i : 0UL ) );
2174  const size_t kend( ( IsUpper<MT5>::value )
2175  ?( ( IsLower<MT4>::value )
2176  ?( min( i+IT::size*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
2177  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
2178  :( IsLower<MT4>::value ? min( i+IT::size*2UL, K ) : K ) );
2179 
2180  IntrinsicType xmm1( (~C).load(i ,j ) );
2181  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
2182  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
2183  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
2184 
2185  for( size_t k=kbegin; k<kend; ++k ) {
2186  const IntrinsicType a1( A.load(i ,k) );
2187  const IntrinsicType a2( A.load(i+IT::size,k) );
2188  const IntrinsicType b1( set( B(k,j ) ) );
2189  const IntrinsicType b2( set( B(k,j+1UL) ) );
2190  xmm1 = xmm1 + a1 * b1;
2191  xmm2 = xmm2 + a2 * b1;
2192  xmm3 = xmm3 + a1 * b2;
2193  xmm4 = xmm4 + a2 * b2;
2194  }
2195 
2196  (~C).store( i , j , xmm1 );
2197  (~C).store( i+IT::size, j , xmm2 );
2198  (~C).store( i , j+1UL, xmm3 );
2199  (~C).store( i+IT::size, j+1UL, xmm4 );
2200  }
2201 
2202  if( j < N )
2203  {
2204  const size_t kbegin( ( IsLower<MT5>::value )
2205  ?( ( IsUpper<MT4>::value )
2206  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2207  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2208  :( IsUpper<MT4>::value ? i : 0UL ) );
2209  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, K ) ):( K ) );
2210 
2211  IntrinsicType xmm1( (~C).load(i ,j) );
2212  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
2213 
2214  for( size_t k=kbegin; k<kend; ++k ) {
2215  const IntrinsicType b1( set( B(k,j) ) );
2216  xmm1 = xmm1 + A.load(i ,k) * b1;
2217  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
2218  }
2219 
2220  (~C).store( i , j, xmm1 );
2221  (~C).store( i+IT::size, j, xmm2 );
2222  }
2223  }
2224 
2225  if( i < M )
2226  {
2227  size_t j( 0UL );
2228 
2229  for( ; (j+2UL) <= N; j+=2UL )
2230  {
2231  const size_t kbegin( ( IsLower<MT5>::value )
2232  ?( ( IsUpper<MT4>::value )
2233  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2234  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2235  :( IsUpper<MT4>::value ? i : 0UL ) );
2236  const size_t kend( ( IsUpper<MT5>::value )
2237  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
2238  :( K ) );
2239 
2240  IntrinsicType xmm1( (~C).load(i,j ) );
2241  IntrinsicType xmm2( (~C).load(i,j+1UL) );
2242 
2243  for( size_t k=kbegin; k<kend; ++k ) {
2244  const IntrinsicType a1( A.load(i,k) );
2245  xmm1 = xmm1 + a1 * set( B(k,j ) );
2246  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
2247  }
2248 
2249  (~C).store( i, j , xmm1 );
2250  (~C).store( i, j+1UL, xmm2 );
2251  }
2252 
2253  if( j < N )
2254  {
2255  const size_t kbegin( ( IsLower<MT5>::value )
2256  ?( ( IsUpper<MT4>::value )
2257  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2258  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2259  :( IsUpper<MT4>::value ? i : 0UL ) );
2260 
2261  IntrinsicType xmm1( (~C).load(i,j) );
2262 
2263  for( size_t k=kbegin; k<K; ++k ) {
2264  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
2265  }
2266 
2267  (~C).store( i, j, xmm1 );
2268  }
2269  }
2270  }
2272  //**********************************************************************************************
2273 
2274  //**Default addition assignment to dense matrices (large matrices)******************************
2288  template< typename MT3 // Type of the left-hand side target matrix
2289  , typename MT4 // Type of the left-hand side matrix operand
2290  , typename MT5 > // Type of the right-hand side matrix operand
2291  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2292  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2293  {
2294  selectDefaultAddAssignKernel( C, A, B );
2295  }
2297  //**********************************************************************************************
2298 
2299  //**Vectorized default addition assignment to row-major dense matrices (large matrices)*********
2314  template< typename MT3 // Type of the left-hand side target matrix
2315  , typename MT4 // Type of the left-hand side matrix operand
2316  , typename MT5 > // Type of the right-hand side matrix operand
2317  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2318  selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2319  {
2320  selectSmallAddAssignKernel( ~C, A, B );
2321  }
2323  //**********************************************************************************************
2324 
2325  //**Vectorized default addition assignment to column-major dense matrices (large matrices)******
2340  template< typename MT3 // Type of the left-hand side target matrix
2341  , typename MT4 // Type of the left-hand side matrix operand
2342  , typename MT5 > // Type of the right-hand side matrix operand
2343  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2344  selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2345  {
2346  typedef IntrinsicTrait<ElementType> IT;
2347 
2348  const size_t M( A.rows() );
2349  const size_t N( B.columns() );
2350  const size_t K( A.columns() );
2351 
2352  const size_t iblock( 128UL );
2353  const size_t jblock( 64UL );
2354  const size_t kblock( 128UL );
2355 
2356  for( size_t ii=0UL; ii<M; ii+=iblock )
2357  {
2358  const size_t iend( min( ii+iblock, M ) );
2359 
2360  for( size_t jj=0UL; jj<N; jj+=jblock )
2361  {
2362  const size_t jend( min( jj+jblock, N ) );
2363 
2364  for( size_t kk=0UL; kk<K; kk+=kblock )
2365  {
2366  const size_t ktmp( min( kk+kblock, K ) );
2367 
2368  size_t i( ii );
2369 
2370  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
2371  {
2372  const size_t i1( i+IT::size );
2373  const size_t i2( i+IT::size*2UL );
2374  const size_t i3( i+IT::size*3UL );
2375 
2376  size_t j( jj );
2377 
2378  for( ; (j+2UL) <= jend; j+=2UL )
2379  {
2380  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2381  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2382  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
2383  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
2384 
2385  IntrinsicType xmm1( (~C).load(i ,j ) );
2386  IntrinsicType xmm2( (~C).load(i1,j ) );
2387  IntrinsicType xmm3( (~C).load(i2,j ) );
2388  IntrinsicType xmm4( (~C).load(i3,j ) );
2389  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
2390  IntrinsicType xmm6( (~C).load(i1,j+1UL) );
2391  IntrinsicType xmm7( (~C).load(i2,j+1UL) );
2392  IntrinsicType xmm8( (~C).load(i3,j+1UL) );
2393 
2394  for( size_t k=kbegin; k<kend; ++k ) {
2395  const IntrinsicType a1( A.load(i ,k) );
2396  const IntrinsicType a2( A.load(i1,k) );
2397  const IntrinsicType a3( A.load(i2,k) );
2398  const IntrinsicType a4( A.load(i3,k) );
2399  const IntrinsicType b1( set( B(k,j ) ) );
2400  const IntrinsicType b2( set( B(k,j+1UL) ) );
2401  xmm1 = xmm1 + a1 * b1;
2402  xmm2 = xmm2 + a2 * b1;
2403  xmm3 = xmm3 + a3 * b1;
2404  xmm4 = xmm4 + a4 * b1;
2405  xmm5 = xmm5 + a1 * b2;
2406  xmm6 = xmm6 + a2 * b2;
2407  xmm7 = xmm7 + a3 * b2;
2408  xmm8 = xmm8 + a4 * b2;
2409  }
2410 
2411  (~C).store( i , j , xmm1 );
2412  (~C).store( i1, j , xmm2 );
2413  (~C).store( i2, j , xmm3 );
2414  (~C).store( i3, j , xmm4 );
2415  (~C).store( i , j+1UL, xmm5 );
2416  (~C).store( i1, j+1UL, xmm6 );
2417  (~C).store( i2, j+1UL, xmm7 );
2418  (~C).store( i3, j+1UL, xmm8 );
2419  }
2420 
2421  if( j < jend )
2422  {
2423  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2424  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2425  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
2426  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
2427 
2428  IntrinsicType xmm1( (~C).load(i ,j) );
2429  IntrinsicType xmm2( (~C).load(i1,j) );
2430  IntrinsicType xmm3( (~C).load(i2,j) );
2431  IntrinsicType xmm4( (~C).load(i3,j) );
2432 
2433  for( size_t k=kbegin; k<kend; ++k ) {
2434  const IntrinsicType b1( set( B(k,j) ) );
2435  xmm1 = xmm1 + A.load(i ,k) * b1;
2436  xmm2 = xmm2 + A.load(i1,k) * b1;
2437  xmm3 = xmm3 + A.load(i2,k) * b1;
2438  xmm4 = xmm4 + A.load(i3,k) * b1;
2439  }
2440 
2441  (~C).store( i , j, xmm1 );
2442  (~C).store( i1, j, xmm2 );
2443  (~C).store( i2, j, xmm3 );
2444  (~C).store( i3, j, xmm4 );
2445  }
2446  }
2447 
2448  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
2449  {
2450  const size_t i1( i+IT::size );
2451 
2452  size_t j( jj );
2453 
2454  for( ; (j+4UL) <= jend; j+=4UL )
2455  {
2456  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2457  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2458  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
2459  ( IsUpper<MT5>::value )?( j+4UL ):( ktmp ) ) );
2460 
2461  IntrinsicType xmm1( (~C).load(i ,j ) );
2462  IntrinsicType xmm2( (~C).load(i1,j ) );
2463  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
2464  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
2465  IntrinsicType xmm5( (~C).load(i ,j+2UL) );
2466  IntrinsicType xmm6( (~C).load(i1,j+2UL) );
2467  IntrinsicType xmm7( (~C).load(i ,j+3UL) );
2468  IntrinsicType xmm8( (~C).load(i1,j+3UL) );
2469 
2470  for( size_t k=kbegin; k<kend; ++k ) {
2471  const IntrinsicType a1( A.load(i ,k) );
2472  const IntrinsicType a2( A.load(i1,k) );
2473  const IntrinsicType b1( set( B(k,j ) ) );
2474  const IntrinsicType b2( set( B(k,j+1UL) ) );
2475  const IntrinsicType b3( set( B(k,j+2UL) ) );
2476  const IntrinsicType b4( set( B(k,j+3UL) ) );
2477  xmm1 = xmm1 + a1 * b1;
2478  xmm2 = xmm2 + a2 * b1;
2479  xmm3 = xmm3 + a1 * b2;
2480  xmm4 = xmm4 + a2 * b2;
2481  xmm5 = xmm5 + a1 * b3;
2482  xmm6 = xmm6 + a2 * b3;
2483  xmm7 = xmm7 + a1 * b4;
2484  xmm8 = xmm8 + a2 * b4;
2485  }
2486 
2487  (~C).store( i , j , xmm1 );
2488  (~C).store( i1, j , xmm2 );
2489  (~C).store( i , j+1UL, xmm3 );
2490  (~C).store( i1, j+1UL, xmm4 );
2491  (~C).store( i , j+2UL, xmm5 );
2492  (~C).store( i1, j+2UL, xmm6 );
2493  (~C).store( i , j+3UL, xmm7 );
2494  (~C).store( i1, j+3UL, xmm8 );
2495  }
2496 
2497  for( ; (j+2UL) <= jend; j+=2UL )
2498  {
2499  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2500  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2501  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
2502  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
2503 
2504  IntrinsicType xmm1( (~C).load(i ,j ) );
2505  IntrinsicType xmm2( (~C).load(i1,j ) );
2506  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
2507  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
2508 
2509  for( size_t k=kbegin; k<kend; ++k ) {
2510  const IntrinsicType a1( A.load(i ,k) );
2511  const IntrinsicType a2( A.load(i1,k) );
2512  const IntrinsicType b1( set( B(k,j ) ) );
2513  const IntrinsicType b2( set( B(k,j+1UL) ) );
2514  xmm1 = xmm1 + a1 * b1;
2515  xmm2 = xmm2 + a2 * b1;
2516  xmm3 = xmm3 + a1 * b2;
2517  xmm4 = xmm4 + a2 * b2;
2518  }
2519 
2520  (~C).store( i , j , xmm1 );
2521  (~C).store( i1, j , xmm2 );
2522  (~C).store( i , j+1UL, xmm3 );
2523  (~C).store( i1, j+1UL, xmm4 );
2524  }
2525 
2526  if( j < jend )
2527  {
2528  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2529  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2530  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
2531  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
2532 
2533  IntrinsicType xmm1( (~C).load(i ,j) );
2534  IntrinsicType xmm2( (~C).load(i1,j) );
2535 
2536  for( size_t k=kbegin; k<kend; ++k ) {
2537  const IntrinsicType b1( set( B(k,j) ) );
2538  xmm1 = xmm1 + A.load(i ,k) * b1;
2539  xmm2 = xmm2 + A.load(i1,k) * b1;
2540  }
2541 
2542  (~C).store( i , j, xmm1 );
2543  (~C).store( i1, j, xmm2 );
2544  }
2545  }
2546 
2547  if( i < iend )
2548  {
2549  for( size_t j=jj; j<jend; ++j )
2550  {
2551  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
2552  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
2553  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size, ktmp ) ):( ktmp ),
2554  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
2555 
2556  IntrinsicType xmm1( (~C).load(i,j) );
2557 
2558  for( size_t k=kbegin; k<kend; ++k ) {
2559  const IntrinsicType b1( set( B(k,j) ) );
2560  xmm1 = xmm1 + A.load(i,k) * b1;
2561  }
2562 
2563  (~C).store( i, j, xmm1 );
2564  }
2565  }
2566  }
2567  }
2568  }
2569  }
2571  //**********************************************************************************************
2572 
2573  //**BLAS-based addition assignment to dense matrices (default)**********************************
2587  template< typename MT3 // Type of the left-hand side target matrix
2588  , typename MT4 // Type of the left-hand side matrix operand
2589  , typename MT5 > // Type of the right-hand side matrix operand
2590  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2591  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2592  {
2593  selectLargeAddAssignKernel( C, A, B );
2594  }
2596  //**********************************************************************************************
2597 
2598  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2599 #if BLAZE_BLAS_MODE
2600 
2613  template< typename MT3 // Type of the left-hand side target matrix
2614  , typename MT4 // Type of the left-hand side matrix operand
2615  , typename MT5 > // Type of the right-hand side matrix operand
2616  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2617  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2618  {
2619  if( IsTriangular<MT4>::value ) {
2620  typename MT3::ResultType tmp( B );
2621  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
2622  addAssign( C, tmp );
2623  }
2624  else if( IsTriangular<MT5>::value ) {
2625  typename MT3::ResultType tmp( A );
2626  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
2627  addAssign( C, tmp );
2628  }
2629  else {
2630  sgemm( C, A, B, 1.0F, 1.0F );
2631  }
2632  }
2634 #endif
2635  //**********************************************************************************************
2636 
2637  //**BLAS-based addition assignment to dense matrices (double precision)*************************
2638 #if BLAZE_BLAS_MODE
2639 
2652  template< typename MT3 // Type of the left-hand side target matrix
2653  , typename MT4 // Type of the left-hand side matrix operand
2654  , typename MT5 > // Type of the right-hand side matrix operand
2655  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2656  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2657  {
2658  if( IsTriangular<MT4>::value ) {
2659  typename MT3::ResultType tmp( B );
2660  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
2661  addAssign( C, tmp );
2662  }
2663  else if( IsTriangular<MT5>::value ) {
2664  typename MT3::ResultType tmp( A );
2665  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
2666  addAssign( C, tmp );
2667  }
2668  else {
2669  dgemm( C, A, B, 1.0, 1.0 );
2670  }
2671  }
2673 #endif
2674  //**********************************************************************************************
2675 
2676  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
2677 #if BLAZE_BLAS_MODE
2678 
2691  template< typename MT3 // Type of the left-hand side target matrix
2692  , typename MT4 // Type of the left-hand side matrix operand
2693  , typename MT5 > // Type of the right-hand side matrix operand
2694  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2695  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2696  {
2697  if( IsTriangular<MT4>::value ) {
2698  typename MT3::ResultType tmp( B );
2699  ctrmm( tmp, A, CblasLeft,
2700  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
2701  complex<float>( 1.0F, 0.0F ) );
2702  addAssign( C, tmp );
2703  }
2704  else if( IsTriangular<MT5>::value ) {
2705  typename MT3::ResultType tmp( A );
2706  ctrmm( tmp, B, CblasRight,
2707  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
2708  complex<float>( 1.0F, 0.0F ) );
2709  addAssign( C, tmp );
2710  }
2711  else {
2712  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2713  }
2714  }
2716 #endif
2717  //**********************************************************************************************
2718 
2719  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
2720 #if BLAZE_BLAS_MODE
2721 
2734  template< typename MT3 // Type of the left-hand side target matrix
2735  , typename MT4 // Type of the left-hand side matrix operand
2736  , typename MT5 > // Type of the right-hand side matrix operand
2737  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2738  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2739  {
2740  if( IsTriangular<MT4>::value ) {
2741  typename MT3::ResultType tmp( B );
2742  ztrmm( tmp, A, CblasLeft,
2743  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
2744  complex<double>( 1.0, 0.0 ) );
2745  addAssign( C, tmp );
2746  }
2747  else if( IsTriangular<MT5>::value ) {
2748  typename MT3::ResultType tmp( A );
2749  ztrmm( tmp, B, CblasRight,
2750  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
2751  complex<double>( 1.0, 0.0 ) );
2752  addAssign( C, tmp );
2753  }
2754  else {
2755  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2756  }
2757  }
2759 #endif
2760  //**********************************************************************************************
2761 
2762  //**Restructuring addition assignment to row-major matrices*************************************
2777  template< typename MT > // Type of the target matrix
2778  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2779  addAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
2780  {
2782 
2784 
2785  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2786  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2787 
2788  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2789  addAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
2790  else if( IsSymmetric<MT1>::value )
2791  addAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
2792  else
2793  addAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
2794  }
2796  //**********************************************************************************************
2797 
2798  //**Addition assignment to sparse matrices******************************************************
2799  // No special implementation for the addition assignment to sparse matrices.
2800  //**********************************************************************************************
2801 
2802  //**Subtraction assignment to dense matrices****************************************************
2815  template< typename MT // Type of the target dense matrix
2816  , bool SO > // Storage order of the target dense matrix
2817  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2818  subAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
2819  {
2821 
2822  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2823  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2824 
2825  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2826  return;
2827  }
2828 
2829  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
2830  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
2831 
2832  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2833  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2834  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2835  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2836  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2837  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2838 
2839  TDMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
2840  }
2842  //**********************************************************************************************
2843 
2844  //**Subtraction assignment to dense matrices (kernel selection)*********************************
2855  template< typename MT3 // Type of the left-hand side target matrix
2856  , typename MT4 // Type of the left-hand side matrix operand
2857  , typename MT5 > // Type of the right-hand side matrix operand
2858  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2859  {
2860  if( ( IsDiagonal<MT4>::value ) ||
2861  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
2862  selectSmallSubAssignKernel( C, A, B );
2863  else
2864  selectBlasSubAssignKernel( C, A, B );
2865  }
2867  //**********************************************************************************************
2868 
2869  //**Default subtraction assignment to dense matrices (general/general)**************************
2883  template< typename MT3 // Type of the left-hand side target matrix
2884  , typename MT4 // Type of the left-hand side matrix operand
2885  , typename MT5 > // Type of the right-hand side matrix operand
2886  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
2887  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2888  {
2889  const size_t M( A.rows() );
2890  const size_t N( B.columns() );
2891  const size_t K( A.columns() );
2892 
2893  for( size_t j=0UL; j<N; ++j )
2894  {
2895  const size_t kbegin( ( IsLower<MT5>::value )
2896  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2897  :( 0UL ) );
2898  const size_t kend( ( IsUpper<MT5>::value )
2899  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2900  :( K ) );
2901  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
2902 
2903  for( size_t k=kbegin; k<kend; ++k )
2904  {
2905  const size_t ibegin( ( IsLower<MT4>::value )
2906  ?( IsStrictlyLower<MT4>::value ? k+1UL : k )
2907  :( 0UL ) );
2908  const size_t iend( ( IsUpper<MT4>::value )
2909  ?( IsStrictlyUpper<MT4>::value ? k : k+1UL )
2910  :( M ) );
2911  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2912 
2913  const size_t inum( iend - ibegin );
2914  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2915 
2916  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2917  C(i ,j) -= A(i ,k) * B(k,j);
2918  C(i+1UL,j) -= A(i+1UL,k) * B(k,j);
2919  }
2920  if( ipos < iend ) {
2921  C(ipos,j) -= A(ipos,k) * B(k,j);
2922  }
2923  }
2924  }
2925  }
2927  //**********************************************************************************************
2928 
2929  //**Default subtraction assignment to dense matrices (general/diagonal)*************************
2943  template< typename MT3 // Type of the left-hand side target matrix
2944  , typename MT4 // Type of the left-hand side matrix operand
2945  , typename MT5 > // Type of the right-hand side matrix operand
2946  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
2947  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2948  {
2950 
2951  const size_t M( A.rows() );
2952  const size_t N( B.columns() );
2953 
2954  for( size_t j=0UL; j<N; ++j )
2955  {
2956  const size_t ibegin( ( IsLower<MT4>::value )
2957  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
2958  :( 0UL ) );
2959  const size_t iend( ( IsUpper<MT4>::value )
2960  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
2961  :( M ) );
2962  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2963 
2964  const size_t inum( iend - ibegin );
2965  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2966 
2967  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2968  C(i ,j) -= A(i ,j) * B(j,j);
2969  C(i+1UL,j) -= A(i+1UL,j) * B(j,j);
2970  }
2971  if( ipos < iend ) {
2972  C(ipos,j) -= A(ipos,j) * B(j,j);
2973  }
2974  }
2975  }
2977  //**********************************************************************************************
2978 
2979  //**Default subtraction assignment to dense matrices (diagonal/general)*************************
2993  template< typename MT3 // Type of the left-hand side target matrix
2994  , typename MT4 // Type of the left-hand side matrix operand
2995  , typename MT5 > // Type of the right-hand side matrix operand
2996  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
2997  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2998  {
3000 
3001  const size_t M( A.rows() );
3002  const size_t N( B.columns() );
3003 
3004  for( size_t j=0UL; j<N; ++j )
3005  {
3006  const size_t ibegin( ( IsLower<MT5>::value )
3007  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3008  :( 0UL ) );
3009  const size_t iend( ( IsUpper<MT5>::value )
3010  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3011  :( M ) );
3012  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3013 
3014  const size_t inum( iend - ibegin );
3015  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
3016 
3017  for( size_t i=ibegin; i<ipos; i+=2UL ) {
3018  C(i ,j) -= A(i ,i ) * B(i ,j);
3019  C(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j);
3020  }
3021  if( ipos < iend ) {
3022  C(ipos,j) -= A(ipos,ipos) * B(ipos,j);
3023  }
3024  }
3025  }
3027  //**********************************************************************************************
3028 
3029  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
3043  template< typename MT3 // Type of the left-hand side target matrix
3044  , typename MT4 // Type of the left-hand side matrix operand
3045  , typename MT5 > // Type of the right-hand side matrix operand
3046  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
3047  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3048  {
3050 
3051  for( size_t i=0UL; i<A.rows(); ++i ) {
3052  C(i,i) -= A(i,i) * B(i,i);
3053  }
3054  }
3056  //**********************************************************************************************
3057 
3058  //**Default subtraction assignment to dense matrices (small matrices)***************************
3072  template< typename MT3 // Type of the left-hand side target matrix
3073  , typename MT4 // Type of the left-hand side matrix operand
3074  , typename MT5 > // Type of the right-hand side matrix operand
3075  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3076  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3077  {
3078  selectDefaultSubAssignKernel( C, A, B );
3079  }
3081  //**********************************************************************************************
3082 
3083  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
3098  template< typename MT3 // Type of the left-hand side target matrix
3099  , typename MT4 // Type of the left-hand side matrix operand
3100  , typename MT5 > // Type of the right-hand side matrix operand
3101  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3102  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3103  {
3108 
3109  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3110  const typename MT5::OppositeType tmp( serial( B ) );
3111  subAssign( ~C, A * tmp );
3112  }
3113  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3114  const typename MT4::OppositeType tmp( serial( A ) );
3115  subAssign( ~C, tmp * B );
3116  }
3117  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3118  const typename MT5::OppositeType tmp( serial( B ) );
3119  subAssign( ~C, A * tmp );
3120  }
3121  else {
3122  const typename MT4::OppositeType tmp( serial( A ) );
3123  subAssign( ~C, tmp * B );
3124  }
3125  }
3127  //**********************************************************************************************
3128 
3129  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
3144  template< typename MT3 // Type of the left-hand side target matrix
3145  , typename MT4 // Type of the left-hand side matrix operand
3146  , typename MT5 > // Type of the right-hand side matrix operand
3147  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3148  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3149  {
3150  typedef IntrinsicTrait<ElementType> IT;
3151 
3152  const size_t M( A.rows() );
3153  const size_t N( B.columns() );
3154  const size_t K( A.columns() );
3155 
3156  size_t i( 0UL );
3157 
3158  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3159  for( size_t j=0UL; j<N; ++j )
3160  {
3161  const size_t kbegin( ( IsLower<MT5>::value )
3162  ?( ( IsUpper<MT4>::value )
3163  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3164  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3165  :( IsUpper<MT4>::value ? i : 0UL ) );
3166  const size_t kend( ( IsUpper<MT5>::value )
3167  ?( ( IsLower<MT4>::value )
3168  ?( min( i+IT::size*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
3169  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
3170  :( IsLower<MT4>::value ? min( i+IT::size*8UL, K ) : K ) );
3171 
3172  IntrinsicType xmm1( (~C).load(i ,j) );
3173  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
3174  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
3175  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
3176  IntrinsicType xmm5( (~C).load(i+IT::size*4UL,j) );
3177  IntrinsicType xmm6( (~C).load(i+IT::size*5UL,j) );
3178  IntrinsicType xmm7( (~C).load(i+IT::size*6UL,j) );
3179  IntrinsicType xmm8( (~C).load(i+IT::size*7UL,j) );
3180 
3181  for( size_t k=kbegin; k<kend; ++k ) {
3182  const IntrinsicType b1( set( B(k,j) ) );
3183  xmm1 = xmm1 - A.load(i ,k) * b1;
3184  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
3185  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
3186  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
3187  xmm5 = xmm5 - A.load(i+IT::size*4UL,k) * b1;
3188  xmm6 = xmm6 - A.load(i+IT::size*5UL,k) * b1;
3189  xmm7 = xmm7 - A.load(i+IT::size*6UL,k) * b1;
3190  xmm8 = xmm8 - A.load(i+IT::size*7UL,k) * b1;
3191  }
3192 
3193  (~C).store( i , j, xmm1 );
3194  (~C).store( i+IT::size , j, xmm2 );
3195  (~C).store( i+IT::size*2UL, j, xmm3 );
3196  (~C).store( i+IT::size*3UL, j, xmm4 );
3197  (~C).store( i+IT::size*4UL, j, xmm5 );
3198  (~C).store( i+IT::size*5UL, j, xmm6 );
3199  (~C).store( i+IT::size*6UL, j, xmm7 );
3200  (~C).store( i+IT::size*7UL, j, xmm8 );
3201  }
3202  }
3203 
3204  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
3205  {
3206  size_t j( 0UL );
3207 
3208  for( ; (j+2UL) <= N; j+=2UL )
3209  {
3210  const size_t kbegin( ( IsLower<MT5>::value )
3211  ?( ( IsUpper<MT4>::value )
3212  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3213  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3214  :( IsUpper<MT4>::value ? i : 0UL ) );
3215  const size_t kend( ( IsUpper<MT5>::value )
3216  ?( ( IsLower<MT4>::value )
3217  ?( min( i+IT::size*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
3218  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
3219  :( IsLower<MT4>::value ? min( i+IT::size*4UL, K ) : K ) );
3220 
3221  IntrinsicType xmm1( (~C).load(i ,j ) );
3222  IntrinsicType xmm2( (~C).load(i+IT::size ,j ) );
3223  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j ) );
3224  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j ) );
3225  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
3226  IntrinsicType xmm6( (~C).load(i+IT::size ,j+1UL) );
3227  IntrinsicType xmm7( (~C).load(i+IT::size*2UL,j+1UL) );
3228  IntrinsicType xmm8( (~C).load(i+IT::size*3UL,j+1UL) );
3229 
3230  for( size_t k=kbegin; k<kend; ++k ) {
3231  const IntrinsicType a1( A.load(i ,k) );
3232  const IntrinsicType a2( A.load(i+IT::size ,k) );
3233  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
3234  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
3235  const IntrinsicType b1( set( B(k,j ) ) );
3236  const IntrinsicType b2( set( B(k,j+1UL) ) );
3237  xmm1 = xmm1 - a1 * b1;
3238  xmm2 = xmm2 - a2 * b1;
3239  xmm3 = xmm3 - a3 * b1;
3240  xmm4 = xmm4 - a4 * b1;
3241  xmm5 = xmm5 - a1 * b2;
3242  xmm6 = xmm6 - a2 * b2;
3243  xmm7 = xmm7 - a3 * b2;
3244  xmm8 = xmm8 - a4 * b2;
3245  }
3246 
3247  (~C).store( i , j , xmm1 );
3248  (~C).store( i+IT::size , j , xmm2 );
3249  (~C).store( i+IT::size*2UL, j , xmm3 );
3250  (~C).store( i+IT::size*3UL, j , xmm4 );
3251  (~C).store( i , j+1UL, xmm5 );
3252  (~C).store( i+IT::size , j+1UL, xmm6 );
3253  (~C).store( i+IT::size*2UL, j+1UL, xmm7 );
3254  (~C).store( i+IT::size*3UL, j+1UL, xmm8 );
3255  }
3256 
3257  if( j < N )
3258  {
3259  const size_t kbegin( ( IsLower<MT5>::value )
3260  ?( ( IsUpper<MT4>::value )
3261  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3262  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3263  :( IsUpper<MT4>::value ? i : 0UL ) );
3264  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, K ) ):( K ) );
3265 
3266  IntrinsicType xmm1( (~C).load(i ,j) );
3267  IntrinsicType xmm2( (~C).load(i+IT::size ,j) );
3268  IntrinsicType xmm3( (~C).load(i+IT::size*2UL,j) );
3269  IntrinsicType xmm4( (~C).load(i+IT::size*3UL,j) );
3270 
3271  for( size_t k=kbegin; k<kend; ++k ) {
3272  const IntrinsicType b1( set( B(k,j) ) );
3273  xmm1 = xmm1 - A.load(i ,k) * b1;
3274  xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
3275  xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
3276  xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
3277  }
3278 
3279  (~C).store( i , j, xmm1 );
3280  (~C).store( i+IT::size , j, xmm2 );
3281  (~C).store( i+IT::size*2UL, j, xmm3 );
3282  (~C).store( i+IT::size*3UL, j, xmm4 );
3283  }
3284  }
3285 
3286  for( ; (i+IT::size) < M; i+=IT::size*2UL )
3287  {
3288  size_t j( 0UL );
3289 
3290  for( ; (j+2UL) <= N; j+=2UL )
3291  {
3292  const size_t kbegin( ( IsLower<MT5>::value )
3293  ?( ( IsUpper<MT4>::value )
3294  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3295  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3296  :( IsUpper<MT4>::value ? i : 0UL ) );
3297  const size_t kend( ( IsUpper<MT5>::value )
3298  ?( ( IsLower<MT4>::value )
3299  ?( min( i+IT::size*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
3300  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
3301  :( IsLower<MT4>::value ? min( i+IT::size*2UL, K ) : K ) );
3302 
3303  IntrinsicType xmm1( (~C).load(i ,j ) );
3304  IntrinsicType xmm2( (~C).load(i+IT::size,j ) );
3305  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
3306  IntrinsicType xmm4( (~C).load(i+IT::size,j+1UL) );
3307 
3308  for( size_t k=kbegin; k<kend; ++k ) {
3309  const IntrinsicType a1( A.load(i ,k) );
3310  const IntrinsicType a2( A.load(i+IT::size,k) );
3311  const IntrinsicType b1( set( B(k,j ) ) );
3312  const IntrinsicType b2( set( B(k,j+1UL) ) );
3313  xmm1 = xmm1 - a1 * b1;
3314  xmm2 = xmm2 - a2 * b1;
3315  xmm3 = xmm3 - a1 * b2;
3316  xmm4 = xmm4 - a2 * b2;
3317  }
3318 
3319  (~C).store( i , j , xmm1 );
3320  (~C).store( i+IT::size, j , xmm2 );
3321  (~C).store( i , j+1UL, xmm3 );
3322  (~C).store( i+IT::size, j+1UL, xmm4 );
3323  }
3324 
3325  if( j < N )
3326  {
3327  const size_t kbegin( ( IsLower<MT5>::value )
3328  ?( ( IsUpper<MT4>::value )
3329  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3330  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3331  :( IsUpper<MT4>::value ? i : 0UL ) );
3332  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, K ) ):( K ) );
3333 
3334  IntrinsicType xmm1( (~C).load(i ,j) );
3335  IntrinsicType xmm2( (~C).load(i+IT::size,j) );
3336 
3337  for( size_t k=kbegin; k<kend; ++k ) {
3338  const IntrinsicType b1( set( B(k,j) ) );
3339  xmm1 = xmm1 - A.load(i ,k) * b1;
3340  xmm2 = xmm2 - A.load(i+IT::size,k) * b1;
3341  }
3342 
3343  (~C).store( i , j, xmm1 );
3344  (~C).store( i+IT::size, j, xmm2 );
3345  }
3346  }
3347 
3348  if( i < M )
3349  {
3350  size_t j( 0UL );
3351 
3352  for( ; (j+2UL) <= N; j+=2UL )
3353  {
3354  const size_t kbegin( ( IsLower<MT5>::value )
3355  ?( ( IsUpper<MT4>::value )
3356  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3357  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3358  :( IsUpper<MT4>::value ? i : 0UL ) );
3359  const size_t kend( ( IsUpper<MT5>::value )
3360  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
3361  :( K ) );
3362 
3363  IntrinsicType xmm1( (~C).load(i,j ) );
3364  IntrinsicType xmm2( (~C).load(i,j+1UL) );
3365 
3366  for( size_t k=kbegin; k<kend; ++k ) {
3367  const IntrinsicType a1( A.load(i,k) );
3368  xmm1 = xmm1 - a1 * set( B(k,j ) );
3369  xmm2 = xmm2 - a1 * set( B(k,j+1UL) );
3370  }
3371 
3372  (~C).store( i, j , xmm1 );
3373  (~C).store( i, j+1UL, xmm2 );
3374  }
3375 
3376  if( j < N )
3377  {
3378  const size_t kbegin( ( IsLower<MT5>::value )
3379  ?( ( IsUpper<MT4>::value )
3380  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
3381  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
3382  :( IsUpper<MT4>::value ? i : 0UL ) );
3383 
3384  IntrinsicType xmm1( (~C).load(i,j) );
3385 
3386  for( size_t k=kbegin; k<K; ++k ) {
3387  xmm1 = xmm1 - A.load(i,k) * set( B(k,j) );
3388  }
3389 
3390  (~C).store( i, j, xmm1 );
3391  }
3392  }
3393  }
3395  //**********************************************************************************************
3396 
3397  //**Default subtraction assignment to dense matrices (large matrices)***************************
3411  template< typename MT3 // Type of the left-hand side target matrix
3412  , typename MT4 // Type of the left-hand side matrix operand
3413  , typename MT5 > // Type of the right-hand side matrix operand
3414  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3415  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3416  {
3417  selectDefaultSubAssignKernel( C, A, B );
3418  }
3420  //**********************************************************************************************
3421 
3422  //**Vectorized default subtraction assignment to row-major dense matrices (large matrices)******
3437  template< typename MT3 // Type of the left-hand side target matrix
3438  , typename MT4 // Type of the left-hand side matrix operand
3439  , typename MT5 > // Type of the right-hand side matrix operand
3440  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3441  selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3442  {
3443  selectSmallSubAssignKernel( ~C, A, B );
3444  }
3446  //**********************************************************************************************
3447 
3448  //**Vectorized default subtraction assignment to column-major dense matrices (large matrices)***
3463  template< typename MT3 // Type of the left-hand side target matrix
3464  , typename MT4 // Type of the left-hand side matrix operand
3465  , typename MT5 > // Type of the right-hand side matrix operand
3466  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3467  selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3468  {
3469  typedef IntrinsicTrait<ElementType> IT;
3470 
3471  const size_t M( A.rows() );
3472  const size_t N( B.columns() );
3473  const size_t K( A.columns() );
3474 
3475  const size_t iblock( 128UL );
3476  const size_t jblock( 64UL );
3477  const size_t kblock( 128UL );
3478 
3479  for( size_t ii=0UL; ii<M; ii+=iblock )
3480  {
3481  const size_t iend( min( ii+iblock, M ) );
3482 
3483  for( size_t jj=0UL; jj<N; jj+=jblock )
3484  {
3485  const size_t jend( min( jj+jblock, N ) );
3486 
3487  for( size_t kk=0UL; kk<K; kk+=kblock )
3488  {
3489  const size_t ktmp( min( kk+kblock, K ) );
3490 
3491  size_t i( ii );
3492 
3493  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
3494  {
3495  const size_t i1( i+IT::size );
3496  const size_t i2( i+IT::size*2UL );
3497  const size_t i3( i+IT::size*3UL );
3498 
3499  size_t j( jj );
3500 
3501  for( ; (j+2UL) <= jend; j+=2UL )
3502  {
3503  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3504  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3505  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
3506  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
3507 
3508  IntrinsicType xmm1( (~C).load(i ,j ) );
3509  IntrinsicType xmm2( (~C).load(i1,j ) );
3510  IntrinsicType xmm3( (~C).load(i2,j ) );
3511  IntrinsicType xmm4( (~C).load(i3,j ) );
3512  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
3513  IntrinsicType xmm6( (~C).load(i1,j+1UL) );
3514  IntrinsicType xmm7( (~C).load(i2,j+1UL) );
3515  IntrinsicType xmm8( (~C).load(i3,j+1UL) );
3516 
3517  for( size_t k=kbegin; k<kend; ++k ) {
3518  const IntrinsicType a1( A.load(i ,k) );
3519  const IntrinsicType a2( A.load(i1,k) );
3520  const IntrinsicType a3( A.load(i2,k) );
3521  const IntrinsicType a4( A.load(i3,k) );
3522  const IntrinsicType b1( set( B(k,j ) ) );
3523  const IntrinsicType b2( set( B(k,j+1UL) ) );
3524  xmm1 = xmm1 - a1 * b1;
3525  xmm2 = xmm2 - a2 * b1;
3526  xmm3 = xmm3 - a3 * b1;
3527  xmm4 = xmm4 - a4 * b1;
3528  xmm5 = xmm5 - a1 * b2;
3529  xmm6 = xmm6 - a2 * b2;
3530  xmm7 = xmm7 - a3 * b2;
3531  xmm8 = xmm8 - a4 * b2;
3532  }
3533 
3534  (~C).store( i , j , xmm1 );
3535  (~C).store( i1, j , xmm2 );
3536  (~C).store( i2, j , xmm3 );
3537  (~C).store( i3, j , xmm4 );
3538  (~C).store( i , j+1UL, xmm5 );
3539  (~C).store( i1, j+1UL, xmm6 );
3540  (~C).store( i2, j+1UL, xmm7 );
3541  (~C).store( i3, j+1UL, xmm8 );
3542  }
3543 
3544  if( j < jend )
3545  {
3546  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3547  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3548  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
3549  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
3550 
3551  IntrinsicType xmm1( (~C).load(i ,j) );
3552  IntrinsicType xmm2( (~C).load(i1,j) );
3553  IntrinsicType xmm3( (~C).load(i2,j) );
3554  IntrinsicType xmm4( (~C).load(i3,j) );
3555 
3556  for( size_t k=kbegin; k<kend; ++k ) {
3557  const IntrinsicType b1( set( B(k,j) ) );
3558  xmm1 = xmm1 - A.load(i ,k) * b1;
3559  xmm2 = xmm2 - A.load(i1,k) * b1;
3560  xmm3 = xmm3 - A.load(i2,k) * b1;
3561  xmm4 = xmm4 - A.load(i3,k) * b1;
3562  }
3563 
3564  (~C).store( i , j, xmm1 );
3565  (~C).store( i1, j, xmm2 );
3566  (~C).store( i2, j, xmm3 );
3567  (~C).store( i3, j, xmm4 );
3568  }
3569  }
3570 
3571  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
3572  {
3573  const size_t i1( i+IT::size );
3574 
3575  size_t j( jj );
3576 
3577  for( ; (j+4UL) <= jend; j+=4UL )
3578  {
3579  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3580  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3581  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
3582  ( IsUpper<MT5>::value )?( j+4UL ):( ktmp ) ) );
3583 
3584  IntrinsicType xmm1( (~C).load(i ,j ) );
3585  IntrinsicType xmm2( (~C).load(i1,j ) );
3586  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
3587  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
3588  IntrinsicType xmm5( (~C).load(i ,j+2UL) );
3589  IntrinsicType xmm6( (~C).load(i1,j+2UL) );
3590  IntrinsicType xmm7( (~C).load(i ,j+3UL) );
3591  IntrinsicType xmm8( (~C).load(i1,j+3UL) );
3592 
3593  for( size_t k=kbegin; k<kend; ++k ) {
3594  const IntrinsicType a1( A.load(i ,k) );
3595  const IntrinsicType a2( A.load(i1,k) );
3596  const IntrinsicType b1( set( B(k,j ) ) );
3597  const IntrinsicType b2( set( B(k,j+1UL) ) );
3598  const IntrinsicType b3( set( B(k,j+2UL) ) );
3599  const IntrinsicType b4( set( B(k,j+3UL) ) );
3600  xmm1 = xmm1 - a1 * b1;
3601  xmm2 = xmm2 - a2 * b1;
3602  xmm3 = xmm3 - a1 * b2;
3603  xmm4 = xmm4 - a2 * b2;
3604  xmm5 = xmm5 - a1 * b3;
3605  xmm6 = xmm6 - a2 * b3;
3606  xmm7 = xmm7 - a1 * b4;
3607  xmm8 = xmm8 - a2 * b4;
3608  }
3609 
3610  (~C).store( i , j , xmm1 );
3611  (~C).store( i1, j , xmm2 );
3612  (~C).store( i , j+1UL, xmm3 );
3613  (~C).store( i1, j+1UL, xmm4 );
3614  (~C).store( i , j+2UL, xmm5 );
3615  (~C).store( i1, j+2UL, xmm6 );
3616  (~C).store( i , j+3UL, xmm7 );
3617  (~C).store( i1, j+3UL, xmm8 );
3618  }
3619 
3620  for( ; (j+2UL) <= jend; j+=2UL )
3621  {
3622  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3623  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3624  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
3625  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
3626 
3627  IntrinsicType xmm1( (~C).load(i ,j ) );
3628  IntrinsicType xmm2( (~C).load(i1,j ) );
3629  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
3630  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
3631 
3632  for( size_t k=kbegin; k<kend; ++k ) {
3633  const IntrinsicType a1( A.load(i ,k) );
3634  const IntrinsicType a2( A.load(i1,k) );
3635  const IntrinsicType b1( set( B(k,j ) ) );
3636  const IntrinsicType b2( set( B(k,j+1UL) ) );
3637  xmm1 = xmm1 - a1 * b1;
3638  xmm2 = xmm2 - a2 * b1;
3639  xmm3 = xmm3 - a1 * b2;
3640  xmm4 = xmm4 - a2 * b2;
3641  }
3642 
3643  (~C).store( i , j , xmm1 );
3644  (~C).store( i1, j , xmm2 );
3645  (~C).store( i , j+1UL, xmm3 );
3646  (~C).store( i1, j+1UL, xmm4 );
3647  }
3648 
3649  if( j < jend )
3650  {
3651  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3652  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3653  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
3654  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
3655 
3656  IntrinsicType xmm1( (~C).load(i ,j) );
3657  IntrinsicType xmm2( (~C).load(i1,j) );
3658 
3659  for( size_t k=kbegin; k<kend; ++k ) {
3660  const IntrinsicType b1( set( B(k,j) ) );
3661  xmm1 = xmm1 - A.load(i ,k) * b1;
3662  xmm2 = xmm2 - A.load(i1,k) * b1;
3663  }
3664 
3665  (~C).store( i , j, xmm1 );
3666  (~C).store( i1, j, xmm2 );
3667  }
3668  }
3669 
3670  if( i < iend )
3671  {
3672  for( size_t j=jj; j<jend; ++j )
3673  {
3674  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
3675  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
3676  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size, ktmp ) ):( ktmp ),
3677  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
3678 
3679  IntrinsicType xmm1( (~C).load(i,j) );
3680 
3681  for( size_t k=kbegin; k<kend; ++k ) {
3682  const IntrinsicType b1( set( B(k,j) ) );
3683  xmm1 = xmm1 - A.load(i,k) * b1;
3684  }
3685 
3686  (~C).store( i, j, xmm1 );
3687  }
3688  }
3689  }
3690  }
3691  }
3692  }
3694  //**********************************************************************************************
3695 
3696  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
3710  template< typename MT3 // Type of the left-hand side target matrix
3711  , typename MT4 // Type of the left-hand side matrix operand
3712  , typename MT5 > // Type of the right-hand side matrix operand
3713  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
3714  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3715  {
3716  selectLargeSubAssignKernel( C, A, B );
3717  }
3719  //**********************************************************************************************
3720 
3721  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3722 #if BLAZE_BLAS_MODE
3723 
3736  template< typename MT3 // Type of the left-hand side target matrix
3737  , typename MT4 // Type of the left-hand side matrix operand
3738  , typename MT5 > // Type of the right-hand side matrix operand
3739  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
3740  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3741  {
3742  if( IsTriangular<MT4>::value ) {
3743  typename MT3::ResultType tmp( B );
3744  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
3745  subAssign( C, tmp );
3746  }
3747  else if( IsTriangular<MT5>::value ) {
3748  typename MT3::ResultType tmp( A );
3749  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
3750  subAssign( C, tmp );
3751  }
3752  else {
3753  sgemm( C, A, B, -1.0F, 1.0F );
3754  }
3755  }
3757 #endif
3758  //**********************************************************************************************
3759 
3760  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3761 #if BLAZE_BLAS_MODE
3762 
3775  template< typename MT3 // Type of the left-hand side target matrix
3776  , typename MT4 // Type of the left-hand side matrix operand
3777  , typename MT5 > // Type of the right-hand side matrix operand
3778  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
3779  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3780  {
3781  if( IsTriangular<MT4>::value ) {
3782  typename MT3::ResultType tmp( B );
3783  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
3784  subAssign( C, tmp );
3785  }
3786  else if( IsTriangular<MT5>::value ) {
3787  typename MT3::ResultType tmp( A );
3788  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
3789  subAssign( C, tmp );
3790  }
3791  else {
3792  dgemm( C, A, B, -1.0, 1.0 );
3793  }
3794  }
3796 #endif
3797  //**********************************************************************************************
3798 
3799  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3800 #if BLAZE_BLAS_MODE
3801 
3814  template< typename MT3 // Type of the left-hand side target matrix
3815  , typename MT4 // Type of the left-hand side matrix operand
3816  , typename MT5 > // Type of the right-hand side matrix operand
3817  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3818  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3819  {
3820  if( IsTriangular<MT4>::value ) {
3821  typename MT3::ResultType tmp( B );
3822  ctrmm( tmp, A, CblasLeft,
3823  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
3824  complex<float>( 1.0F, 0.0F ) );
3825  subAssign( C, tmp );
3826  }
3827  else if( IsTriangular<MT5>::value ) {
3828  typename MT3::ResultType tmp( A );
3829  ctrmm( tmp, B, CblasRight,
3830  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
3831  complex<float>( 1.0F, 0.0F ) );
3832  subAssign( C, tmp );
3833  }
3834  else {
3835  cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3836  }
3837  }
3839 #endif
3840  //**********************************************************************************************
3841 
3842  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3843 #if BLAZE_BLAS_MODE
3844 
3857  template< typename MT3 // Type of the left-hand side target matrix
3858  , typename MT4 // Type of the left-hand side matrix operand
3859  , typename MT5 > // Type of the right-hand side matrix operand
3860  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3861  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3862  {
3863  if( IsTriangular<MT4>::value ) {
3864  typename MT3::ResultType tmp( B );
3865  ztrmm( tmp, A, CblasLeft,
3866  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
3867  complex<float>( 1.0, 0.0 ) );
3868  subAssign( C, tmp );
3869  }
3870  else if( IsTriangular<MT5>::value ) {
3871  typename MT3::ResultType tmp( A );
3872  ztrmm( tmp, B, CblasRight,
3873  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
3874  complex<float>( 1.0, 0.0 ) );
3875  subAssign( C, tmp );
3876  }
3877  else {
3878  zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
3879  }
3880  }
3882 #endif
3883  //**********************************************************************************************
3884 
3885  //**Restructuring subtraction assignment to row-major matrices**********************************
3901  template< typename MT > // Type of the target matrix
3902  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3903  subAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
3904  {
3906 
3908 
3909  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3910  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3911 
3912  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
3913  subAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
3914  else if( IsSymmetric<MT1>::value )
3915  subAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
3916  else
3917  subAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
3918  }
3920  //**********************************************************************************************
3921 
3922  //**Subtraction assignment to sparse matrices***************************************************
3923  // No special implementation for the subtraction assignment to sparse matrices.
3924  //**********************************************************************************************
3925 
3926  //**Multiplication assignment to dense matrices*************************************************
3927  // No special implementation for the multiplication assignment to dense matrices.
3928  //**********************************************************************************************
3929 
3930  //**Multiplication assignment to sparse matrices************************************************
3931  // No special implementation for the multiplication assignment to sparse matrices.
3932  //**********************************************************************************************
3933 
3934  //**SMP assignment to dense matrices************************************************************
3950  template< typename MT // Type of the target dense matrix
3951  , bool SO > // Storage order of the target dense matrix
3952  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
3953  smpAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
3954  {
3956 
3957  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3958  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3959 
3960  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
3961  return;
3962  }
3963  else if( rhs.lhs_.columns() == 0UL ) {
3964  reset( ~lhs );
3965  return;
3966  }
3967 
3968  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3969  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3970 
3971  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3972  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3973  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3974  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3975  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3976  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3977 
3978  smpAssign( ~lhs, A * B );
3979  }
3981  //**********************************************************************************************
3982 
3983  //**SMP assignment to sparse matrices***********************************************************
3999  template< typename MT // Type of the target sparse matrix
4000  , bool SO > // Storage order of the target sparse matrix
4001  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4002  smpAssign( SparseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
4003  {
4005 
4006  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
4007 
4014 
4015  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4016  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4017 
4018  const TmpType tmp( rhs );
4019  smpAssign( ~lhs, tmp );
4020  }
4022  //**********************************************************************************************
4023 
4024  //**Restructuring SMP assignment to row-major matrices******************************************
4039  template< typename MT > // Type of the target matrix
4040  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4041  smpAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
4042  {
4044 
4046 
4047  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4048  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4049 
4050  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4051  smpAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
4052  else if( IsSymmetric<MT1>::value )
4053  smpAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
4054  else
4055  smpAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
4056  }
4058  //**********************************************************************************************
4059 
4060  //**SMP addition assignment to dense matrices***************************************************
4076  template< typename MT // Type of the target dense matrix
4077  , bool SO > // Storage order of the target dense matrix
4078  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4079  smpAddAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
4080  {
4082 
4083  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4084  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4085 
4086  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4087  return;
4088  }
4089 
4090  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4091  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4092 
4093  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4094  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4095  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4096  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4097  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4098  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4099 
4100  smpAddAssign( ~lhs, A * B );
4101  }
4103  //**********************************************************************************************
4104 
4105  //**Restructuring SMP addition assignment to row-major matrices*********************************
4121  template< typename MT > // Type of the target matrix
4122  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4123  smpAddAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
4124  {
4126 
4128 
4129  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4130  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4131 
4132  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4133  smpAddAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
4134  else if( IsSymmetric<MT1>::value )
4135  smpAddAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
4136  else
4137  smpAddAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
4138  }
4140  //**********************************************************************************************
4141 
4142  //**SMP addition assignment to sparse matrices**************************************************
4143  // No special implementation for the SMP addition assignment to sparse matrices.
4144  //**********************************************************************************************
4145 
4146  //**SMP subtraction assignment to dense matrices************************************************
4162  template< typename MT // Type of the target dense matrix
4163  , bool SO > // Storage order of the target dense matrix
4164  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4165  smpSubAssign( DenseMatrix<MT,SO>& lhs, const TDMatTDMatMultExpr& rhs )
4166  {
4168 
4169  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4170  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4171 
4172  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4173  return;
4174  }
4175 
4176  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4177  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4178 
4179  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4180  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4181  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4182  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4183  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4184  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4185 
4186  smpSubAssign( ~lhs, A * B );
4187  }
4189  //**********************************************************************************************
4190 
4191  //**Restructuring SMP subtraction assignment to row-major matrices******************************
4207  template< typename MT > // Type of the target matrix
4208  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4209  smpSubAssign( Matrix<MT,false>& lhs, const TDMatTDMatMultExpr& rhs )
4210  {
4212 
4214 
4215  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4216  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4217 
4218  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4219  smpSubAssign( ~lhs, trans( rhs.lhs_ ) * trans( rhs.rhs_ ) );
4220  else if( IsSymmetric<MT1>::value )
4221  smpSubAssign( ~lhs, trans( rhs.lhs_ ) * rhs.rhs_ );
4222  else
4223  smpSubAssign( ~lhs, rhs.lhs_ * trans( rhs.rhs_ ) );
4224  }
4226  //**********************************************************************************************
4227 
4228  //**SMP subtraction assignment to sparse matrices***********************************************
4229  // No special implementation for the SMP subtraction assignment to sparse matrices.
4230  //**********************************************************************************************
4231 
4232  //**SMP multiplication assignment to dense matrices*********************************************
4233  // No special implementation for the SMP multiplication assignment to dense matrices.
4234  //**********************************************************************************************
4235 
4236  //**SMP multiplication assignment to sparse matrices********************************************
4237  // No special implementation for the SMP multiplication assignment to sparse matrices.
4238  //**********************************************************************************************
4239 
4240  //**Compile time checks*************************************************************************
4248  //**********************************************************************************************
4249 };
4250 //*************************************************************************************************
4251 
4252 
4253 
4254 
4255 //=================================================================================================
4256 //
4257 // DMATSCALARMULTEXPR SPECIALIZATION
4258 //
4259 //=================================================================================================
4260 
4261 //*************************************************************************************************
4269 template< typename MT1 // Type of the left-hand side dense matrix
4270  , typename MT2 // Type of the right-hand side dense matrix
4271  , typename ST > // Type of the right-hand side scalar value
4272 class DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >
4273  : public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
4274  , private MatScalarMultExpr
4275  , private Computation
4276 {
4277  private:
4278  //**Type definitions****************************************************************************
4279  typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
4280  typedef typename MMM::ResultType RES;
4281  typedef typename MT1::ResultType RT1;
4282  typedef typename MT2::ResultType RT2;
4283  typedef typename RT1::ElementType ET1;
4284  typedef typename RT2::ElementType ET2;
4285  typedef typename MT1::CompositeType CT1;
4286  typedef typename MT2::CompositeType CT2;
4287  //**********************************************************************************************
4288 
4289  //**********************************************************************************************
4291  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
4292  //**********************************************************************************************
4293 
4294  //**********************************************************************************************
4296  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
4297  //**********************************************************************************************
4298 
4299  //**********************************************************************************************
4301 
4306  template< typename T1, typename T2, typename T3 >
4307  struct CanExploitSymmetry {
4308  enum { value = IsRowMajorMatrix<T1>::value &&
4309  ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
4310  };
4311  //**********************************************************************************************
4312 
4313  //**********************************************************************************************
4315 
4318  template< typename T1, typename T2, typename T3 >
4319  struct IsEvaluationRequired {
4320  enum { value = ( evaluateLeft || evaluateRight ) &&
4321  !CanExploitSymmetry<T1,T2,T3>::value };
4322  };
4323  //**********************************************************************************************
4324 
4325  //**********************************************************************************************
4327 
4330  template< typename T1, typename T2, typename T3, typename T4 >
4331  struct UseSinglePrecisionKernel {
4332  enum { value = BLAZE_BLAS_MODE &&
4333  HasMutableDataAccess<T1>::value &&
4334  HasConstDataAccess<T2>::value &&
4335  HasConstDataAccess<T3>::value &&
4336  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4337  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4338  IsFloat<typename T1::ElementType>::value &&
4339  IsFloat<typename T2::ElementType>::value &&
4340  IsFloat<typename T3::ElementType>::value &&
4341  !IsComplex<T4>::value };
4342  };
4343  //**********************************************************************************************
4344 
4345  //**********************************************************************************************
4347 
4350  template< typename T1, typename T2, typename T3, typename T4 >
4351  struct UseDoublePrecisionKernel {
4352  enum { value = BLAZE_BLAS_MODE &&
4353  HasMutableDataAccess<T1>::value &&
4354  HasConstDataAccess<T2>::value &&
4355  HasConstDataAccess<T3>::value &&
4356  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4357  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4358  IsDouble<typename T1::ElementType>::value &&
4359  IsDouble<typename T2::ElementType>::value &&
4360  IsDouble<typename T3::ElementType>::value &&
4361  !IsComplex<T4>::value };
4362  };
4363  //**********************************************************************************************
4364 
4365  //**********************************************************************************************
4367 
4370  template< typename T1, typename T2, typename T3 >
4371  struct UseSinglePrecisionComplexKernel {
4372  typedef complex<float> Type;
4373  enum { value = BLAZE_BLAS_MODE &&
4374  HasMutableDataAccess<T1>::value &&
4375  HasConstDataAccess<T2>::value &&
4376  HasConstDataAccess<T3>::value &&
4377  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4378  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4379  IsSame<typename T1::ElementType,Type>::value &&
4380  IsSame<typename T2::ElementType,Type>::value &&
4381  IsSame<typename T3::ElementType,Type>::value };
4382  };
4383  //**********************************************************************************************
4384 
4385  //**********************************************************************************************
4387 
4390  template< typename T1, typename T2, typename T3 >
4391  struct UseDoublePrecisionComplexKernel {
4392  typedef complex<double> Type;
4393  enum { value = BLAZE_BLAS_MODE &&
4394  HasMutableDataAccess<T1>::value &&
4395  HasConstDataAccess<T2>::value &&
4396  HasConstDataAccess<T3>::value &&
4397  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4398  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4399  IsSame<typename T1::ElementType,Type>::value &&
4400  IsSame<typename T2::ElementType,Type>::value &&
4401  IsSame<typename T3::ElementType,Type>::value };
4402  };
4403  //**********************************************************************************************
4404 
4405  //**********************************************************************************************
4407 
4409  template< typename T1, typename T2, typename T3, typename T4 >
4410  struct UseDefaultKernel {
4411  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
4412  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
4413  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
4414  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
4415  };
4416  //**********************************************************************************************
4417 
4418  //**********************************************************************************************
4420 
4422  template< typename T1, typename T2, typename T3, typename T4 >
4423  struct UseVectorizedDefaultKernel {
4424  enum { value = !IsDiagonal<T2>::value &&
4425  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4426  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
4427  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
4428  IsSame<typename T1::ElementType,T4>::value &&
4429  IntrinsicTrait<typename T1::ElementType>::addition &&
4430  IntrinsicTrait<typename T1::ElementType>::subtraction &&
4431  IntrinsicTrait<typename T1::ElementType>::multiplication };
4432  };
4433  //**********************************************************************************************
4434 
4435  public:
4436  //**Type definitions****************************************************************************
4437  typedef DMatScalarMultExpr<MMM,ST,true> This;
4438  typedef typename MultTrait<RES,ST>::Type ResultType;
4439  typedef typename ResultType::OppositeType OppositeType;
4440  typedef typename ResultType::TransposeType TransposeType;
4441  typedef typename ResultType::ElementType ElementType;
4442  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
4443  typedef const ElementType ReturnType;
4444  typedef const ResultType CompositeType;
4445 
4447  typedef const TDMatTDMatMultExpr<MT1,MT2> LeftOperand;
4448 
4450  typedef ST RightOperand;
4451 
4453  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
4454 
4456  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
4457  //**********************************************************************************************
4458 
4459  //**Compilation flags***************************************************************************
4461  enum { vectorizable = !IsDiagonal<MT1>::value &&
4462  MT1::vectorizable && MT2::vectorizable &&
4463  IsSame<ET1,ET2>::value &&
4464  IsSame<ET1,ST>::value &&
4465  IntrinsicTrait<ET1>::addition &&
4466  IntrinsicTrait<ET1>::multiplication };
4467 
4469  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
4470  !evaluateRight && MT2::smpAssignable };
4471  //**********************************************************************************************
4472 
4473  //**Constructor*********************************************************************************
4479  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
4480  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
4481  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
4482  {}
4483  //**********************************************************************************************
4484 
4485  //**Access operator*****************************************************************************
4492  inline ReturnType operator()( size_t i, size_t j ) const {
4493  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
4494  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
4495  return matrix_(i,j) * scalar_;
4496  }
4497  //**********************************************************************************************
4498 
4499  //**Rows function*******************************************************************************
4504  inline size_t rows() const {
4505  return matrix_.rows();
4506  }
4507  //**********************************************************************************************
4508 
4509  //**Columns function****************************************************************************
4514  inline size_t columns() const {
4515  return matrix_.columns();
4516  }
4517  //**********************************************************************************************
4518 
4519  //**Left operand access*************************************************************************
4524  inline LeftOperand leftOperand() const {
4525  return matrix_;
4526  }
4527  //**********************************************************************************************
4528 
4529  //**Right operand access************************************************************************
4534  inline RightOperand rightOperand() const {
4535  return scalar_;
4536  }
4537  //**********************************************************************************************
4538 
4539  //**********************************************************************************************
4545  template< typename T >
4546  inline bool canAlias( const T* alias ) const {
4547  return matrix_.canAlias( alias );
4548  }
4549  //**********************************************************************************************
4550 
4551  //**********************************************************************************************
4557  template< typename T >
4558  inline bool isAliased( const T* alias ) const {
4559  return matrix_.isAliased( alias );
4560  }
4561  //**********************************************************************************************
4562 
4563  //**********************************************************************************************
4568  inline bool isAligned() const {
4569  return matrix_.isAligned();
4570  }
4571  //**********************************************************************************************
4572 
4573  //**********************************************************************************************
4578  inline bool canSMPAssign() const {
4579  typename MMM::RightOperand B( matrix_.rightOperand() );
4580  return ( !BLAZE_BLAS_IS_PARALLEL ||
4581  ( rows() * columns() < TDMATTDMATMULT_THRESHOLD ) ) &&
4582  ( B.columns() > SMP_TDMATTDMATMULT_THRESHOLD );
4583  }
4584  //**********************************************************************************************
4585 
4586  private:
4587  //**Member variables****************************************************************************
4588  LeftOperand matrix_;
4589  RightOperand scalar_;
4590  //**********************************************************************************************
4591 
4592  //**Assignment to dense matrices****************************************************************
4604  template< typename MT // Type of the target dense matrix
4605  , bool SO > // Storage order of the target dense matrix
4606  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4607  assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4608  {
4610 
4611  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4612  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4613 
4614  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4615  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4616 
4617  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4618  return;
4619  }
4620  else if( left.columns() == 0UL ) {
4621  reset( ~lhs );
4622  return;
4623  }
4624 
4625  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4626  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4627 
4628  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4629  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4630  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4631  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4632  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4633  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4634 
4635  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
4636  }
4637  //**********************************************************************************************
4638 
4639  //**Assignment to dense matrices (kernel selection)*********************************************
4650  template< typename MT3 // Type of the left-hand side target matrix
4651  , typename MT4 // Type of the left-hand side matrix operand
4652  , typename MT5 // Type of the right-hand side matrix operand
4653  , typename ST2 > // Type of the scalar value
4654  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4655  {
4656  if( ( IsDiagonal<MT4>::value ) ||
4657  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
4658  selectSmallAssignKernel( C, A, B, scalar );
4659  else
4660  selectBlasAssignKernel( C, A, B, scalar );
4661  }
4662  //**********************************************************************************************
4663 
4664  //**Default assignment to dense matrices (general/general)**************************************
4678  template< typename MT3 // Type of the left-hand side target matrix
4679  , typename MT4 // Type of the left-hand side matrix operand
4680  , typename MT5 // Type of the right-hand side matrix operand
4681  , typename ST2 > // Type of the scalar value
4682  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
4683  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4684  {
4685  const size_t M( A.rows() );
4686  const size_t N( B.columns() );
4687  const size_t K( A.columns() );
4688 
4689  for( size_t j=0UL; j<N; ++j )
4690  {
4691  const size_t kbegin( ( IsLower<MT5>::value )
4692  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4693  :( 0UL ) );
4694  const size_t kend( ( IsUpper<MT5>::value )
4695  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4696  :( K ) );
4697  BLAZE_INTERNAL_ASSERT( kbegin <= kend, "Invalid loop indices detected" );
4698 
4699  if( IsStrictlyTriangular<MT5>::value && kbegin == kend ) {
4700  for( size_t i=0UL; i<M; ++i ) {
4701  reset( (~C)(i,j) );
4702  }
4703  continue;
4704  }
4705 
4706  {
4707  const size_t ibegin( ( IsLower<MT4>::value )
4708  ?( IsStrictlyLower<MT4>::value ? kbegin+1UL : kbegin )
4709  :( 0UL ) );
4710  const size_t iend( ( IsUpper<MT4>::value )
4711  ?( IsStrictlyUpper<MT4>::value ? kbegin : kbegin+1UL )
4712  :( M ) );
4713  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4714 
4715  if( IsLower<MT4>::value && IsLower<MT5>::value ) {
4716  for( size_t i=0UL; i<ibegin; ++i ) {
4717  reset( C(i,j) );
4718  }
4719  }
4720  else if( IsStrictlyLower<MT4>::value ) {
4721  reset( C(0UL,j) );
4722  }
4723  for( size_t i=ibegin; i<iend; ++i ) {
4724  C(i,j) = A(i,kbegin) * B(kbegin,j);
4725  }
4726  if( IsUpper<MT4>::value && IsUpper<MT5>::value ) {
4727  for( size_t i=iend; i<M; ++i ) {
4728  reset( C(i,j) );
4729  }
4730  }
4731  else if( IsStrictlyUpper<MT4>::value ) {
4732  reset( C(M-1UL,j) );
4733  }
4734  }
4735 
4736  for( size_t k=kbegin+1UL; k<kend; ++k )
4737  {
4738  const size_t ibegin( ( IsLower<MT4>::value )
4739  ?( IsStrictlyLower<MT4>::value ? k+1UL : k )
4740  :( 0UL ) );
4741  const size_t iend( ( IsUpper<MT4>::value )
4742  ?( IsStrictlyUpper<MT4>::value ? k-1UL : k )
4743  :( M ) );
4744  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4745 
4746  for( size_t i=ibegin; i<iend; ++i ) {
4747  C(i,j) += A(i,k) * B(k,j);
4748  }
4749  if( IsUpper<MT4>::value ) {
4750  C(iend,j) = A(iend,k) * B(k,j);
4751  }
4752  }
4753 
4754  {
4755  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
4756  ?( IsStrictlyLower<MT4>::value || IsStrictlyLower<MT5>::value ? j+1UL : j )
4757  :( 0UL ) );
4758  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4759  ?( IsStrictlyUpper<MT4>::value || IsStrictlyUpper<MT5>::value ? j : j+1UL )
4760  :( M ) );
4761  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4762 
4763  for( size_t i=ibegin; i<iend; ++i ) {
4764  C(i,j) *= scalar;
4765  }
4766  }
4767  }
4768  }
4769  //**********************************************************************************************
4770 
4771  //**Default assignment to dense matrices (general/diagonal)*************************************
4785  template< typename MT3 // Type of the left-hand side target matrix
4786  , typename MT4 // Type of the left-hand side matrix operand
4787  , typename MT5 // Type of the right-hand side matrix operand
4788  , typename ST2 > // Type of the scalar value
4789  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
4790  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4791  {
4793 
4794  const size_t M( A.rows() );
4795  const size_t N( B.columns() );
4796 
4797  for( size_t j=0UL; j<N; ++j )
4798  {
4799  const size_t ibegin( ( IsLower<MT4>::value )
4800  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
4801  :( 0UL ) );
4802  const size_t iend( ( IsUpper<MT4>::value )
4803  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
4804  :( M ) );
4805  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4806 
4807  if( IsLower<MT4>::value ) {
4808  for( size_t i=0UL; i<ibegin; ++i ) {
4809  reset( C(i,j) );
4810  }
4811  }
4812  for( size_t i=ibegin; i<iend; ++i ) {
4813  C(i,j) = A(i,j) * B(j,j) * scalar;
4814  }
4815  if( IsUpper<MT4>::value ) {
4816  for( size_t i=iend; i<M; ++i ) {
4817  reset( C(i,j) );
4818  }
4819  }
4820  }
4821  }
4822  //**********************************************************************************************
4823 
4824  //**Default assignment to dense matrices (diagonal/general)*************************************
4838  template< typename MT3 // Type of the left-hand side target matrix
4839  , typename MT4 // Type of the left-hand side matrix operand
4840  , typename MT5 // Type of the right-hand side matrix operand
4841  , typename ST2 > // Type of the scalar value
4842  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
4843  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4844  {
4846 
4847  const size_t M( A.rows() );
4848  const size_t N( B.columns() );
4849 
4850  for( size_t j=0UL; j<N; ++j )
4851  {
4852  const size_t ibegin( ( IsLower<MT5>::value )
4853  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4854  :( 0UL ) );
4855  const size_t iend( ( IsUpper<MT5>::value )
4856  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4857  :( M ) );
4858  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4859 
4860  if( IsLower<MT4>::value ) {
4861  for( size_t i=0UL; i<ibegin; ++i ) {
4862  reset( C(i,j) );
4863  }
4864  }
4865  for( size_t i=ibegin; i<iend; ++i ) {
4866  C(i,j) = A(i,i) * B(i,j) * scalar;
4867  }
4868  if( IsUpper<MT4>::value ) {
4869  for( size_t i=iend; i<M; ++i ) {
4870  reset( C(i,j) );
4871  }
4872  }
4873  }
4874  }
4875  //**********************************************************************************************
4876 
4877  //**Default assignment to dense matrices (diagonal/diagonal)************************************
4891  template< typename MT3 // Type of the left-hand side target matrix
4892  , typename MT4 // Type of the left-hand side matrix operand
4893  , typename MT5 // Type of the right-hand side matrix operand
4894  , typename ST2 > // Type of the scalar value
4895  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
4896  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4897  {
4899 
4900  reset( C );
4901 
4902  for( size_t i=0UL; i<A.rows(); ++i ) {
4903  C(i,i) = A(i,i) * B(i,i) * scalar;
4904  }
4905  }
4906  //**********************************************************************************************
4907 
4908  //**Default assignment to dense matrices (small matrices)***************************************
4922  template< typename MT3 // Type of the left-hand side target matrix
4923  , typename MT4 // Type of the left-hand side matrix operand
4924  , typename MT5 // Type of the right-hand side matrix operand
4925  , typename ST2 > // Type of the scalar value
4926  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4927  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4928  {
4929  selectDefaultAssignKernel( C, A, B, scalar );
4930  }
4931  //**********************************************************************************************
4932 
4933  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
4948  template< typename MT3 // Type of the left-hand side target matrix
4949  , typename MT4 // Type of the left-hand side matrix operand
4950  , typename MT5 // Type of the right-hand side matrix operand
4951  , typename ST2 > // Type of the scalar value
4952  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4953  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4954  {
4959 
4960  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
4961  const typename MT5::OppositeType tmp( serial( B ) );
4962  assign( ~C, A * tmp * scalar );
4963  }
4964  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
4965  const typename MT4::OppositeType tmp( serial( A ) );
4966  assign( ~C, tmp * B * scalar );
4967  }
4968  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
4969  const typename MT5::OppositeType tmp( serial( B ) );
4970  assign( ~C, A * tmp * scalar );
4971  }
4972  else {
4973  const typename MT4::OppositeType tmp( serial( A ) );
4974  assign( ~C, tmp * B * scalar );
4975  }
4976  }
4977  //**********************************************************************************************
4978 
4979  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
4994  template< typename MT3 // Type of the left-hand side target matrix
4995  , typename MT4 // Type of the left-hand side matrix operand
4996  , typename MT5 // Type of the right-hand side matrix operand
4997  , typename ST2 > // Type of the scalar value
4998  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4999  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5000  {
5001  typedef IntrinsicTrait<ElementType> IT;
5002 
5003  const size_t M( A.rows() );
5004  const size_t N( B.columns() );
5005  const size_t K( A.columns() );
5006 
5007  const IntrinsicType factor( set( scalar ) );
5008 
5009  size_t i( 0UL );
5010 
5011  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
5012  for( size_t j=0UL; j<N; ++j )
5013  {
5014  const size_t kbegin( ( IsLower<MT5>::value )
5015  ?( ( IsUpper<MT4>::value )
5016  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5017  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5018  :( IsUpper<MT4>::value ? i : 0UL ) );
5019  const size_t kend( ( IsUpper<MT5>::value )
5020  ?( ( IsLower<MT4>::value )
5021  ?( min( i+IT::size*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
5022  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
5023  :( IsLower<MT4>::value ? min( i+IT::size*8UL, K ) : K ) );
5024 
5025  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5026 
5027  for( size_t k=kbegin; k<kend; ++k ) {
5028  const IntrinsicType b1( set( B(k,j) ) );
5029  xmm1 = xmm1 + A.load(i ,k) * b1;
5030  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
5031  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
5032  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
5033  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
5034  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
5035  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
5036  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
5037  }
5038 
5039  (~C).store( i , j, xmm1 * factor );
5040  (~C).store( i+IT::size , j, xmm2 * factor );
5041  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
5042  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
5043  (~C).store( i+IT::size*4UL, j, xmm5 * factor );
5044  (~C).store( i+IT::size*5UL, j, xmm6 * factor );
5045  (~C).store( i+IT::size*6UL, j, xmm7 * factor );
5046  (~C).store( i+IT::size*7UL, j, xmm8 * factor );
5047  }
5048  }
5049 
5050  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
5051  {
5052  size_t j( 0UL );
5053 
5054  for( ; (j+2UL) <= N; j+=2UL )
5055  {
5056  const size_t kbegin( ( IsLower<MT5>::value )
5057  ?( ( IsUpper<MT4>::value )
5058  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5059  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5060  :( IsUpper<MT4>::value ? i : 0UL ) );
5061  const size_t kend( ( IsUpper<MT5>::value )
5062  ?( ( IsLower<MT4>::value )
5063  ?( min( i+IT::size*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
5064  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
5065  :( IsLower<MT4>::value ? min( i+IT::size*4UL, K ) : K ) );
5066 
5067  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5068 
5069  for( size_t k=kbegin; k<kend; ++k ) {
5070  const IntrinsicType a1( A.load(i ,k) );
5071  const IntrinsicType a2( A.load(i+IT::size ,k) );
5072  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
5073  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
5074  const IntrinsicType b1( set( B(k,j ) ) );
5075  const IntrinsicType b2( set( B(k,j+1UL) ) );
5076  xmm1 = xmm1 + a1 * b1;
5077  xmm2 = xmm2 + a2 * b1;
5078  xmm3 = xmm3 + a3 * b1;
5079  xmm4 = xmm4 + a4 * b1;
5080  xmm5 = xmm5 + a1 * b2;
5081  xmm6 = xmm6 + a2 * b2;
5082  xmm7 = xmm7 + a3 * b2;
5083  xmm8 = xmm8 + a4 * b2;
5084  }
5085 
5086  (~C).store( i , j , xmm1 * factor );
5087  (~C).store( i+IT::size , j , xmm2 * factor );
5088  (~C).store( i+IT::size*2UL, j , xmm3 * factor );
5089  (~C).store( i+IT::size*3UL, j , xmm4 * factor );
5090  (~C).store( i , j+1UL, xmm5 * factor );
5091  (~C).store( i+IT::size , j+1UL, xmm6 * factor );
5092  (~C).store( i+IT::size*2UL, j+1UL, xmm7 * factor );
5093  (~C).store( i+IT::size*3UL, j+1UL, xmm8 * factor );
5094  }
5095 
5096  if( j < N )
5097  {
5098  const size_t kbegin( ( IsLower<MT5>::value )
5099  ?( ( IsUpper<MT4>::value )
5100  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5101  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5102  :( IsUpper<MT4>::value ? i : 0UL ) );
5103  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, K ) ):( K ) );
5104 
5105  IntrinsicType xmm1, xmm2, xmm3, xmm4;
5106 
5107  for( size_t k=kbegin; k<kend; ++k ) {
5108  const IntrinsicType b1( set( B(k,j) ) );
5109  xmm1 = xmm1 + A.load(i ,k) * b1;
5110  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
5111  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
5112  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
5113  }
5114 
5115  (~C).store( i , j, xmm1 * factor );
5116  (~C).store( i+IT::size , j, xmm2 * factor );
5117  (~C).store( i+IT::size*2UL, j, xmm3 * factor );
5118  (~C).store( i+IT::size*3UL, j, xmm4 * factor );
5119  }
5120  }
5121 
5122  for( ; (i+IT::size) < M; i+=IT::size*2UL )
5123  {
5124  size_t j( 0UL );
5125 
5126  for( ; (j+2UL) <= N; j+=2UL )
5127  {
5128  const size_t kbegin( ( IsLower<MT5>::value )
5129  ?( ( IsUpper<MT4>::value )
5130  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5131  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5132  :( IsUpper<MT4>::value ? i : 0UL ) );
5133  const size_t kend( ( IsUpper<MT5>::value )
5134  ?( ( IsLower<MT4>::value )
5135  ?( min( i+IT::size*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
5136  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
5137  :( IsLower<MT4>::value ? min( i+IT::size*2UL, K ) : K ) );
5138 
5139  IntrinsicType xmm1, xmm2, xmm3, xmm4;
5140 
5141  for( size_t k=kbegin; k<kend; ++k ) {
5142  const IntrinsicType a1( A.load(i ,k) );
5143  const IntrinsicType a2( A.load(i+IT::size,k) );
5144  const IntrinsicType b1( set( B(k,j ) ) );
5145  const IntrinsicType b2( set( B(k,j+1UL) ) );
5146  xmm1 = xmm1 + a1 * b1;
5147  xmm2 = xmm2 + a2 * b1;
5148  xmm3 = xmm3 + a1 * b2;
5149  xmm4 = xmm4 + a2 * b2;
5150  }
5151 
5152  (~C).store( i , j , xmm1 * factor );
5153  (~C).store( i+IT::size, j , xmm2 * factor );
5154  (~C).store( i , j+1UL, xmm3 * factor );
5155  (~C).store( i+IT::size, j+1UL, xmm4 * factor );
5156  }
5157 
5158  if( j < N )
5159  {
5160  const size_t kbegin( ( IsLower<MT5>::value )
5161  ?( ( IsUpper<MT4>::value )
5162  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5163  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5164  :( IsUpper<MT4>::value ? i : 0UL ) );
5165  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, K ) ):( K ) );
5166 
5167  IntrinsicType xmm1, xmm2;
5168 
5169  for( size_t k=kbegin; k<kend; ++k ) {
5170  const IntrinsicType b1( set( B(k,j) ) );
5171  xmm1 = xmm1 + A.load(i ,k) * b1;
5172  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
5173  }
5174 
5175  (~C).store( i , j, xmm1 * factor );
5176  (~C).store( i+IT::size, j, xmm2 * factor );
5177  }
5178  }
5179 
5180  if( i < M )
5181  {
5182  size_t j( 0UL );
5183 
5184  for( ; (j+2UL) <= N; j+=2UL )
5185  {
5186  const size_t kbegin( ( IsLower<MT5>::value )
5187  ?( ( IsUpper<MT4>::value )
5188  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5189  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5190  :( IsUpper<MT4>::value ? i : 0UL ) );
5191  const size_t kend( ( IsUpper<MT5>::value )
5192  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
5193  :( K ) );
5194 
5195  IntrinsicType xmm1, xmm2;
5196 
5197  for( size_t k=kbegin; k<kend; ++k ) {
5198  const IntrinsicType a1( A.load(i,k) );
5199  xmm1 = xmm1 + a1 * set( B(k,j ) );
5200  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
5201  }
5202 
5203  (~C).store( i, j , xmm1 * factor );
5204  (~C).store( i, j+1UL, xmm2 * factor );
5205  }
5206 
5207  if( j < N )
5208  {
5209  const size_t kbegin( ( IsLower<MT5>::value )
5210  ?( ( IsUpper<MT4>::value )
5211  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
5212  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
5213  :( IsUpper<MT4>::value ? i : 0UL ) );
5214 
5215  IntrinsicType xmm1;
5216 
5217  for( size_t k=kbegin; k<K; ++k ) {
5218  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
5219  }
5220 
5221  (~C).store( i, j, xmm1 * factor );
5222  }
5223  }
5224  }
5225  //**********************************************************************************************
5226 
5227  //**Default assignment to dense matrices (large matrices)***************************************
5241  template< typename MT3 // Type of the left-hand side target matrix
5242  , typename MT4 // Type of the left-hand side matrix operand
5243  , typename MT5 // Type of the right-hand side matrix operand
5244  , typename ST2 > // Type of the scalar value
5245  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5246  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5247  {
5248  selectDefaultAssignKernel( C, A, B, scalar );
5249  }
5250  //**********************************************************************************************
5251 
5252  //**Vectorized default assignment to row-major dense matrices (large matrices)******************
5267  template< typename MT3 // Type of the left-hand side target matrix
5268  , typename MT4 // Type of the left-hand side matrix operand
5269  , typename MT5 // Type of the right-hand side matrix operand
5270  , typename ST2 > // Type of the scalar value
5271  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5272  selectLargeAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5273  {
5274  selectSmallAssignKernel( ~C, A, B, scalar );
5275  }
5276  //**********************************************************************************************
5277 
5278  //**Vectorized default assignment to column-major dense matrices (large matrices)***************
5293  template< typename MT3 // Type of the left-hand side target matrix
5294  , typename MT4 // Type of the left-hand side matrix operand
5295  , typename MT5 // Type of the right-hand side matrix operand
5296  , typename ST2 > // Type of the scalar value
5297  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5298  selectLargeAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5299  {
5300  typedef IntrinsicTrait<ElementType> IT;
5301 
5302  const size_t M( A.rows() );
5303  const size_t N( B.columns() );
5304  const size_t K( A.columns() );
5305 
5306  const size_t iblock( 128UL );
5307  const size_t jblock( 64UL );
5308  const size_t kblock( 128UL );
5309 
5310  const IntrinsicType factor( set( scalar ) );
5311 
5312  for( size_t ii=0UL; ii<M; ii+=iblock )
5313  {
5314  const size_t iend( min( ii+iblock, M ) );
5315 
5316  for( size_t jj=0UL; jj<N; jj+=jblock )
5317  {
5318  const size_t jend( min( jj+jblock, N ) );
5319 
5320  for( size_t j=jj; j<jend; ++j ) {
5321  for( size_t i=ii; i<iend; ++i ) {
5322  reset( (~C)(i,j) );
5323  }
5324  }
5325 
5326  for( size_t kk=0UL; kk<K; kk+=kblock )
5327  {
5328  const size_t ktmp( min( kk+kblock, K ) );
5329 
5330  size_t i( ii );
5331 
5332  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
5333  {
5334  const size_t i1( i+IT::size );
5335  const size_t i2( i+IT::size*2UL );
5336  const size_t i3( i+IT::size*3UL );
5337 
5338  size_t j( jj );
5339 
5340  for( ; (j+2UL) <= jend; j+=2UL )
5341  {
5342  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5343  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5344  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
5345  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
5346 
5347  IntrinsicType xmm1( (~C).load(i ,j ) );
5348  IntrinsicType xmm2( (~C).load(i1,j ) );
5349  IntrinsicType xmm3( (~C).load(i2,j ) );
5350  IntrinsicType xmm4( (~C).load(i3,j ) );
5351  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
5352  IntrinsicType xmm6( (~C).load(i1,j+1UL) );
5353  IntrinsicType xmm7( (~C).load(i2,j+1UL) );
5354  IntrinsicType xmm8( (~C).load(i3,j+1UL) );
5355 
5356  for( size_t k=kbegin; k<kend; ++k ) {
5357  const IntrinsicType a1( A.load(i ,k) );
5358  const IntrinsicType a2( A.load(i1,k) );
5359  const IntrinsicType a3( A.load(i2,k) );
5360  const IntrinsicType a4( A.load(i3,k) );
5361  const IntrinsicType b1( set( B(k,j ) ) );
5362  const IntrinsicType b2( set( B(k,j+1UL) ) );
5363  xmm1 = xmm1 + a1 * b1;
5364  xmm2 = xmm2 + a2 * b1;
5365  xmm3 = xmm3 + a3 * b1;
5366  xmm4 = xmm4 + a4 * b1;
5367  xmm5 = xmm5 + a1 * b2;
5368  xmm6 = xmm6 + a2 * b2;
5369  xmm7 = xmm7 + a3 * b2;
5370  xmm8 = xmm8 + a4 * b2;
5371  }
5372 
5373  (~C).store( i , j , xmm1 * factor );
5374  (~C).store( i1, j , xmm2 * factor );
5375  (~C).store( i2, j , xmm3 * factor );
5376  (~C).store( i3, j , xmm4 * factor );
5377  (~C).store( i , j+1UL, xmm5 * factor );
5378  (~C).store( i1, j+1UL, xmm6 * factor );
5379  (~C).store( i2, j+1UL, xmm7 * factor );
5380  (~C).store( i3, j+1UL, xmm8 * factor );
5381  }
5382 
5383  if( j < jend )
5384  {
5385  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5386  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5387  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
5388  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
5389 
5390  IntrinsicType xmm1( (~C).load(i ,j) );
5391  IntrinsicType xmm2( (~C).load(i1,j) );
5392  IntrinsicType xmm3( (~C).load(i2,j) );
5393  IntrinsicType xmm4( (~C).load(i3,j) );
5394 
5395  for( size_t k=kbegin; k<kend; ++k ) {
5396  const IntrinsicType b1( set( B(k,j) ) );
5397  xmm1 = xmm1 + A.load(i ,k) * b1;
5398  xmm2 = xmm2 + A.load(i1,k) * b1;
5399  xmm3 = xmm3 + A.load(i2,k) * b1;
5400  xmm4 = xmm4 + A.load(i3,k) * b1;
5401  }
5402 
5403  (~C).store( i , j, xmm1 * factor );
5404  (~C).store( i1, j, xmm2 * factor );
5405  (~C).store( i2, j, xmm3 * factor );
5406  (~C).store( i3, j, xmm4 * factor );
5407  }
5408  }
5409 
5410  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
5411  {
5412  const size_t i1( i+IT::size );
5413 
5414  size_t j( jj );
5415 
5416  for( ; (j+4UL) <= jend; j+=4UL )
5417  {
5418  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5419  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5420  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
5421  ( IsUpper<MT5>::value )?( j+4UL ):( ktmp ) ) );
5422 
5423  IntrinsicType xmm1( (~C).load(i ,j ) );
5424  IntrinsicType xmm2( (~C).load(i1,j ) );
5425  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
5426  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
5427  IntrinsicType xmm5( (~C).load(i ,j+2UL) );
5428  IntrinsicType xmm6( (~C).load(i1,j+2UL) );
5429  IntrinsicType xmm7( (~C).load(i ,j+3UL) );
5430  IntrinsicType xmm8( (~C).load(i1,j+3UL) );
5431 
5432  for( size_t k=kbegin; k<kend; ++k ) {
5433  const IntrinsicType a1( A.load(i ,k) );
5434  const IntrinsicType a2( A.load(i1,k) );
5435  const IntrinsicType b1( set( B(k,j ) ) );
5436  const IntrinsicType b2( set( B(k,j+1UL) ) );
5437  const IntrinsicType b3( set( B(k,j+2UL) ) );
5438  const IntrinsicType b4( set( B(k,j+3UL) ) );
5439  xmm1 = xmm1 + a1 * b1;
5440  xmm2 = xmm2 + a2 * b1;
5441  xmm3 = xmm3 + a1 * b2;
5442  xmm4 = xmm4 + a2 * b2;
5443  xmm5 = xmm5 + a1 * b3;
5444  xmm6 = xmm6 + a2 * b3;
5445  xmm7 = xmm7 + a1 * b4;
5446  xmm8 = xmm8 + a2 * b4;
5447  }
5448 
5449  (~C).store( i , j , xmm1 * factor );
5450  (~C).store( i1, j , xmm2 * factor );
5451  (~C).store( i , j+1UL, xmm3 * factor );
5452  (~C).store( i1, j+1UL, xmm4 * factor );
5453  (~C).store( i , j+2UL, xmm5 * factor );
5454  (~C).store( i1, j+2UL, xmm6 * factor );
5455  (~C).store( i , j+3UL, xmm7 * factor );
5456  (~C).store( i1, j+3UL, xmm8 * factor );
5457  }
5458 
5459  for( ; (j+2UL) <= jend; j+=2UL )
5460  {
5461  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5462  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5463  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
5464  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
5465 
5466  IntrinsicType xmm1( (~C).load(i ,j ) );
5467  IntrinsicType xmm2( (~C).load(i1,j ) );
5468  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
5469  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
5470 
5471  for( size_t k=kbegin; k<kend; ++k ) {
5472  const IntrinsicType a1( A.load(i ,k) );
5473  const IntrinsicType a2( A.load(i1,k) );
5474  const IntrinsicType b1( set( B(k,j ) ) );
5475  const IntrinsicType b2( set( B(k,j+1UL) ) );
5476  xmm1 = xmm1 + a1 * b1;
5477  xmm2 = xmm2 + a2 * b1;
5478  xmm3 = xmm3 + a1 * b2;
5479  xmm4 = xmm4 + a2 * b2;
5480  }
5481 
5482  (~C).store( i , j , xmm1 * factor );
5483  (~C).store( i1, j , xmm2 * factor );
5484  (~C).store( i , j+1UL, xmm3 * factor );
5485  (~C).store( i1, j+1UL, xmm4 * factor );
5486  }
5487 
5488  if( j < jend )
5489  {
5490  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5491  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5492  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
5493  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
5494 
5495  IntrinsicType xmm1( (~C).load(i ,j) );
5496  IntrinsicType xmm2( (~C).load(i1,j) );
5497 
5498  for( size_t k=kbegin; k<kend; ++k ) {
5499  const IntrinsicType b1( set( B(k,j) ) );
5500  xmm1 = xmm1 + A.load(i ,k) * b1;
5501  xmm2 = xmm2 + A.load(i1,k) * b1;
5502  }
5503 
5504  (~C).store( i , j, xmm1 * factor );
5505  (~C).store( i1, j, xmm2 * factor );
5506  }
5507  }
5508 
5509  if( i < iend )
5510  {
5511  for( size_t j=jj; j<jend; ++j )
5512  {
5513  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
5514  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
5515  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size, ktmp ) ):( ktmp ),
5516  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
5517 
5518  IntrinsicType xmm1( (~C).load(i,j) );
5519 
5520  for( size_t k=kbegin; k<kend; ++k ) {
5521  const IntrinsicType b1( set( B(k,j) ) );
5522  xmm1 = xmm1 + A.load(i,k) * b1;
5523  }
5524 
5525  (~C).store( i, j, xmm1 * factor );
5526  }
5527  }
5528  }
5529  }
5530  }
5531  }
5532  //**********************************************************************************************
5533 
5534  //**BLAS-based assignment to dense matrices (default)*******************************************
5548  template< typename MT3 // Type of the left-hand side target matrix
5549  , typename MT4 // Type of the left-hand side matrix operand
5550  , typename MT5 // Type of the right-hand side matrix operand
5551  , typename ST2 > // Type of the scalar value
5552  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5553  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5554  {
5555  selectLargeAssignKernel( C, A, B, scalar );
5556  }
5557  //**********************************************************************************************
5558 
5559  //**BLAS-based assignment to dense matrices (single precision)**********************************
5560 #if BLAZE_BLAS_MODE
5561 
5574  template< typename MT3 // Type of the left-hand side target matrix
5575  , typename MT4 // Type of the left-hand side matrix operand
5576  , typename MT5 // Type of the right-hand side matrix operand
5577  , typename ST2 > // Type of the scalar value
5578  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
5579  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5580  {
5581  if( IsTriangular<MT4>::value ) {
5582  assign( C, B );
5583  strmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
5584  }
5585  else if( IsTriangular<MT5>::value ) {
5586  assign( C, A );
5587  strmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
5588  }
5589  else {
5590  sgemm( C, A, B, scalar, 0.0F );
5591  }
5592  }
5593 #endif
5594  //**********************************************************************************************
5595 
5596  //**BLAS-based assignment to dense matrices (double precision)**********************************
5597 #if BLAZE_BLAS_MODE
5598 
5611  template< typename MT3 // Type of the left-hand side target matrix
5612  , typename MT4 // Type of the left-hand side matrix operand
5613  , typename MT5 // Type of the right-hand side matrix operand
5614  , typename ST2 > // Type of the scalar value
5615  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
5616  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5617  {
5618  if( IsTriangular<MT4>::value ) {
5619  assign( C, B );
5620  dtrmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
5621  }
5622  else if( IsTriangular<MT5>::value ) {
5623  assign( C, A );
5624  dtrmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
5625  }
5626  else {
5627  dgemm( C, A, B, scalar, 0.0 );
5628  }
5629  }
5630 #endif
5631  //**********************************************************************************************
5632 
5633  //**BLAS-based assignment to dense matrices (single precision complex)**************************
5634 #if BLAZE_BLAS_MODE
5635 
5648  template< typename MT3 // Type of the left-hand side target matrix
5649  , typename MT4 // Type of the left-hand side matrix operand
5650  , typename MT5 // Type of the right-hand side matrix operand
5651  , typename ST2 > // Type of the scalar value
5652  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
5653  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5654  {
5655  if( IsTriangular<MT4>::value ) {
5656  assign( C, B );
5657  ctrmm( C, A, CblasLeft,
5658  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
5659  complex<float>( scalar, 0.0F ) );
5660  }
5661  else if( IsTriangular<MT5>::value ) {
5662  assign( C, A );
5663  ctrmm( C, B, CblasRight,
5664  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
5665  complex<float>( scalar, 0.0F ) );
5666  }
5667  else {
5668  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
5669  }
5670  }
5671 #endif
5672  //**********************************************************************************************
5673 
5674  //**BLAS-based assignment to dense matrices (double precision complex)**************************
5675 #if BLAZE_BLAS_MODE
5676 
5689  template< typename MT3 // Type of the left-hand side target matrix
5690  , typename MT4 // Type of the left-hand side matrix operand
5691  , typename MT5 // Type of the right-hand side matrix operand
5692  , typename ST2 > // Type of the scalar value
5693  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
5694  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5695  {
5696  if( IsTriangular<MT4>::value ) {
5697  assign( C, B );
5698  ztrmm( C, A, CblasLeft,
5699  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
5700  complex<double>( scalar, 0.0 ) );
5701  }
5702  else if( IsTriangular<MT5>::value ) {
5703  assign( C, A );
5704  ztrmm( C, B, CblasRight,
5705  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
5706  complex<double>( scalar, 0.0 ) );
5707  }
5708  else {
5709  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
5710  }
5711  }
5712 #endif
5713  //**********************************************************************************************
5714 
5715  //**Assignment to sparse matrices***************************************************************
5727  template< typename MT // Type of the target sparse matrix
5728  , bool SO > // Storage order of the target sparse matrix
5729  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
5730  assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5731  {
5733 
5734  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
5735 
5742 
5743  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5744  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5745 
5746  const TmpType tmp( serial( rhs ) );
5747  assign( ~lhs, tmp );
5748  }
5749  //**********************************************************************************************
5750 
5751  //**Restructuring assignment to row-major matrices**********************************************
5765  template< typename MT > // Type of the target matrix
5766  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
5767  assign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
5768  {
5770 
5772 
5773  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5774  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5775 
5776  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
5777  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
5778 
5779  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
5780  assign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
5781  else if( IsSymmetric<MT1>::value )
5782  assign( ~lhs, trans( left ) * right * rhs.scalar_ );
5783  else
5784  assign( ~lhs, left * trans( right ) * rhs.scalar_ );
5785  }
5786  //**********************************************************************************************
5787 
5788  //**Addition assignment to dense matrices*******************************************************
5800  template< typename MT // Type of the target dense matrix
5801  , bool SO > // Storage order of the target dense matrix
5802  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
5803  addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5804  {
5806 
5807  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5808  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5809 
5810  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
5811  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
5812 
5813  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
5814  return;
5815  }
5816 
5817  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
5818  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
5819 
5820  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5821  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
5822  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
5823  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
5824  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
5825  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
5826 
5827  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
5828  }
5829  //**********************************************************************************************
5830 
5831  //**Addition assignment to dense matrices (kernel selection)************************************
5842  template< typename MT3 // Type of the left-hand side target matrix
5843  , typename MT4 // Type of the left-hand side matrix operand
5844  , typename MT5 // Type of the right-hand side matrix operand
5845  , typename ST2 > // Type of the scalar value
5846  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5847  {
5848  if( ( IsDiagonal<MT4>::value ) ||
5849  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
5850  selectSmallAddAssignKernel( C, A, B, scalar );
5851  else
5852  selectBlasAddAssignKernel( C, A, B, scalar );
5853  }
5854  //**********************************************************************************************
5855 
5856  //**Default addition assignment to dense matrices (general/general)*****************************
5870  template< typename MT3 // Type of the left-hand side target matrix
5871  , typename MT4 // Type of the left-hand side matrix operand
5872  , typename MT5 // Type of the right-hand side matrix operand
5873  , typename ST2 > // Type of the scalar value
5874  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
5875  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5876  {
5877  const ResultType tmp( serial( A * B * scalar ) );
5878  addAssign( C, tmp );
5879  }
5880  //**********************************************************************************************
5881 
5882  //**Default addition assignment to dense matrices (general/diagonal)****************************
5896  template< typename MT3 // Type of the left-hand side target matrix
5897  , typename MT4 // Type of the left-hand side matrix operand
5898  , typename MT5 // Type of the right-hand side matrix operand
5899  , typename ST2 > // Type of the scalar value
5900  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
5901  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5902  {
5904 
5905  const size_t M( A.rows() );
5906  const size_t N( B.columns() );
5907 
5908  for( size_t j=0UL; j<N; ++j )
5909  {
5910  const size_t ibegin( ( IsLower<MT4>::value )
5911  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
5912  :( 0UL ) );
5913  const size_t iend( ( IsUpper<MT4>::value )
5914  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
5915  :( M ) );
5916  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
5917 
5918  const size_t inum( iend - ibegin );
5919  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
5920 
5921  for( size_t i=ibegin; i<ipos; i+=2UL ) {
5922  C(i ,j) += A(i ,j) * B(j,j) * scalar;
5923  C(i+1UL,j) += A(i+1UL,j) * B(j,j) * scalar;
5924  }
5925  if( ipos < iend ) {
5926  C(ipos,j) += A(ipos,j) * B(j,j) * scalar;
5927  }
5928  }
5929  }
5930  //**********************************************************************************************
5931 
5932  //**Default addition assignment to dense matrices (diagonal/general)****************************
5946  template< typename MT3 // Type of the left-hand side target matrix
5947  , typename MT4 // Type of the left-hand side matrix operand
5948  , typename MT5 // Type of the right-hand side matrix operand
5949  , typename ST2 > // Type of the scalar value
5950  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
5951  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5952  {
5954 
5955  const size_t M( A.rows() );
5956  const size_t N( B.columns() );
5957 
5958  for( size_t j=0UL; j<N; ++j )
5959  {
5960  const size_t ibegin( ( IsLower<MT5>::value )
5961  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
5962  :( 0UL ) );
5963  const size_t iend( ( IsUpper<MT5>::value )
5964  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
5965  :( M ) );
5966  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
5967 
5968  const size_t inum( iend - ibegin );
5969  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
5970 
5971  for( size_t i=ibegin; i<ipos; i+=2UL ) {
5972  C(i ,j) += A(i ,i ) * B(i ,j) * scalar;
5973  C(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
5974  }
5975  if( ipos < iend ) {
5976  C(ipos,j) += A(ipos,ipos) * B(ipos,j) * scalar;
5977  }
5978  }
5979  }
5980  //**********************************************************************************************
5981 
5982  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
5996  template< typename MT3 // Type of the left-hand side target matrix
5997  , typename MT4 // Type of the left-hand side matrix operand
5998  , typename MT5 // Type of the right-hand side matrix operand
5999  , typename ST2 > // Type of the scalar value
6000  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
6001  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6002  {
6004 
6005  for( size_t i=0UL; i<A.rows(); ++i ) {
6006  C(i,i) += A(i,i) * B(i,i) * scalar;
6007  }
6008  }
6009  //**********************************************************************************************
6010 
6011  //**Default addition assignment to dense matrices (small matrices)******************************
6025  template< typename MT3 // Type of the left-hand side target matrix
6026  , typename MT4 // Type of the left-hand side matrix operand
6027  , typename MT5 // Type of the right-hand side matrix operand
6028  , typename ST2 > // Type of the scalar value
6029  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6030  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6031  {
6032  selectDefaultAddAssignKernel( C, A, B, scalar );
6033  }
6034  //**********************************************************************************************
6035 
6036  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
6051  template< typename MT3 // Type of the left-hand side target matrix
6052  , typename MT4 // Type of the left-hand side matrix operand
6053  , typename MT5 // Type of the right-hand side matrix operand
6054  , typename ST2 > // Type of the scalar value
6055  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6056  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6057  {
6062 
6063  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
6064  const typename MT5::OppositeType tmp( serial( B ) );
6065  addAssign( ~C, A * tmp * scalar );
6066  }
6067  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
6068  const typename MT4::OppositeType tmp( serial( A ) );
6069  addAssign( ~C, tmp * B * scalar );
6070  }
6071  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
6072  const typename MT5::OppositeType tmp( serial( B ) );
6073  addAssign( ~C, A * tmp * scalar );
6074  }
6075  else {
6076  const typename MT4::OppositeType tmp( serial( A ) );
6077  addAssign( ~C, tmp * B * scalar );
6078  }
6079  }
6080  //**********************************************************************************************
6081 
6082  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
6097  template< typename MT3 // Type of the left-hand side target matrix
6098  , typename MT4 // Type of the left-hand side matrix operand
6099  , typename MT5 // Type of the right-hand side matrix operand
6100  , typename ST2 > // Type of the scalar value
6101  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6102  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6103  {
6104  typedef IntrinsicTrait<ElementType> IT;
6105 
6106  const size_t M( A.rows() );
6107  const size_t N( B.columns() );
6108  const size_t K( A.columns() );
6109 
6110  const IntrinsicType factor( set( scalar ) );
6111 
6112  size_t i( 0UL );
6113 
6114  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
6115  for( size_t j=0UL; j<N; ++j )
6116  {
6117  const size_t kbegin( ( IsLower<MT5>::value )
6118  ?( ( IsUpper<MT4>::value )
6119  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6120  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6121  :( IsUpper<MT4>::value ? i : 0UL ) );
6122  const size_t kend( ( IsUpper<MT5>::value )
6123  ?( ( IsLower<MT4>::value )
6124  ?( min( i+IT::size*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
6125  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
6126  :( IsLower<MT4>::value ? min( i+IT::size*8UL, K ) : K ) );
6127 
6128  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6129 
6130  for( size_t k=kbegin; k<kend; ++k ) {
6131  const IntrinsicType b1( set( B(k,j) ) );
6132  xmm1 = xmm1 + A.load(i ,k) * b1;
6133  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
6134  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
6135  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
6136  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
6137  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
6138  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
6139  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
6140  }
6141 
6142  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
6143  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
6144  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
6145  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
6146  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) + xmm5 * factor );
6147  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) + xmm6 * factor );
6148  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) + xmm7 * factor );
6149  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) + xmm8 * factor );
6150  }
6151  }
6152 
6153  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
6154  {
6155  size_t j( 0UL );
6156 
6157  for( ; (j+2UL) <= N; j+=2UL )
6158  {
6159  const size_t kbegin( ( IsLower<MT5>::value )
6160  ?( ( IsUpper<MT4>::value )
6161  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6162  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6163  :( IsUpper<MT4>::value ? i : 0UL ) );
6164  const size_t kend( ( IsUpper<MT5>::value )
6165  ?( ( IsLower<MT4>::value )
6166  ?( min( i+IT::size*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
6167  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
6168  :( IsLower<MT4>::value ? min( i+IT::size*4UL, K ) : K ) );
6169 
6170  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6171 
6172  for( size_t k=kbegin; k<kend; ++k ) {
6173  const IntrinsicType a1( A.load(i ,k) );
6174  const IntrinsicType a2( A.load(i+IT::size ,k) );
6175  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
6176  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
6177  const IntrinsicType b1( set( B(k,j ) ) );
6178  const IntrinsicType b2( set( B(k,j+1UL) ) );
6179  xmm1 = xmm1 + a1 * b1;
6180  xmm2 = xmm2 + a2 * b1;
6181  xmm3 = xmm3 + a3 * b1;
6182  xmm4 = xmm4 + a4 * b1;
6183  xmm5 = xmm5 + a1 * b2;
6184  xmm6 = xmm6 + a2 * b2;
6185  xmm7 = xmm7 + a3 * b2;
6186  xmm8 = xmm8 + a4 * b2;
6187  }
6188 
6189  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6190  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) + xmm2 * factor );
6191  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) + xmm3 * factor );
6192  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) + xmm4 * factor );
6193  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
6194  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) + xmm6 * factor );
6195  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) + xmm7 * factor );
6196  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) + xmm8 * factor );
6197  }
6198 
6199  if( j < N )
6200  {
6201  const size_t kbegin( ( IsLower<MT5>::value )
6202  ?( ( IsUpper<MT4>::value )
6203  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6204  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6205  :( IsUpper<MT4>::value ? i : 0UL ) );
6206  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, K ) ):( K ) );
6207 
6208  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6209 
6210  for( size_t k=kbegin; k<kend; ++k ) {
6211  const IntrinsicType b1( set( B(k,j) ) );
6212  xmm1 = xmm1 + A.load(i ,k) * b1;
6213  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
6214  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
6215  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
6216  }
6217 
6218  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
6219  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
6220  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
6221  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
6222  }
6223  }
6224 
6225  for( ; (i+IT::size) < M; i+=IT::size*2UL )
6226  {
6227  size_t j( 0UL );
6228 
6229  for( ; (j+2UL) <= N; j+=2UL )
6230  {
6231  const size_t kbegin( ( IsLower<MT5>::value )
6232  ?( ( IsUpper<MT4>::value )
6233  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6234  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6235  :( IsUpper<MT4>::value ? i : 0UL ) );
6236  const size_t kend( ( IsUpper<MT5>::value )
6237  ?( ( IsLower<MT4>::value )
6238  ?( min( i+IT::size*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
6239  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
6240  :( IsLower<MT4>::value ? min( i+IT::size*2UL, K ) : K ) );
6241 
6242  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6243 
6244  for( size_t k=kbegin; k<kend; ++k ) {
6245  const IntrinsicType a1( A.load(i ,k) );
6246  const IntrinsicType a2( A.load(i+IT::size,k) );
6247  const IntrinsicType b1( set( B(k,j ) ) );
6248  const IntrinsicType b2( set( B(k,j+1UL) ) );
6249  xmm1 = xmm1 + a1 * b1;
6250  xmm2 = xmm2 + a2 * b1;
6251  xmm3 = xmm3 + a1 * b2;
6252  xmm4 = xmm4 + a2 * b2;
6253  }
6254 
6255  (~C).store( i , j , (~C).load(i ,j ) + xmm1 * factor );
6256  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) + xmm2 * factor );
6257  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
6258  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) + xmm4 * factor );
6259  }
6260 
6261  if( j < N )
6262  {
6263  const size_t kbegin( ( IsLower<MT5>::value )
6264  ?( ( IsUpper<MT4>::value )
6265  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6266  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6267  :( IsUpper<MT4>::value ? i : 0UL ) );
6268  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, K ) ):( K ) );
6269 
6270  IntrinsicType xmm1, xmm2;
6271 
6272  for( size_t k=kbegin; k<kend; ++k ) {
6273  const IntrinsicType b1( set( B(k,j) ) );
6274  xmm1 = xmm1 + A.load(i ,k) * b1;
6275  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
6276  }
6277 
6278  (~C).store( i , j, (~C).load(i ,j) + xmm1 * factor );
6279  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) + xmm2 * factor );
6280  }
6281  }
6282 
6283  if( i < M )
6284  {
6285  size_t j( 0UL );
6286 
6287  for( ; (j+2UL) <= N; j+=2UL )
6288  {
6289  const size_t kbegin( ( IsLower<MT5>::value )
6290  ?( ( IsUpper<MT4>::value )
6291  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6292  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6293  :( IsUpper<MT4>::value ? i : 0UL ) );
6294  const size_t kend( ( IsUpper<MT5>::value )
6295  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
6296  :( K ) );
6297 
6298  IntrinsicType xmm1, xmm2;
6299 
6300  for( size_t k=kbegin; k<kend; ++k ) {
6301  const IntrinsicType a1( A.load(i,k) );
6302  xmm1 = xmm1 + a1 * set( B(k,j ) );
6303  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
6304  }
6305 
6306  (~C).store( i, j , (~C).load(i,j ) + xmm1 * factor );
6307  (~C).store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
6308  }
6309 
6310  if( j < N )
6311  {
6312  const size_t kbegin( ( IsLower<MT5>::value )
6313  ?( ( IsUpper<MT4>::value )
6314  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
6315  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
6316  :( IsUpper<MT4>::value ? i : 0UL ) );
6317 
6318  IntrinsicType xmm1;
6319 
6320  for( size_t k=kbegin; k<K; ++k ) {
6321  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
6322  }
6323 
6324  (~C).store( i, j, (~C).load(i,j) + xmm1 * factor );
6325  }
6326  }
6327  }
6328  //**********************************************************************************************
6329 
6330  //**Default addition assignment to dense matrices (large matrices)******************************
6344  template< typename MT3 // Type of the left-hand side target matrix
6345  , typename MT4 // Type of the left-hand side matrix operand
6346  , typename MT5 // Type of the right-hand side matrix operand
6347  , typename ST2 > // Type of the scalar value
6348  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6349  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6350  {
6351  selectDefaultAddAssignKernel( C, A, B, scalar );
6352  }
6353  //**********************************************************************************************
6354 
6355  //**Vectorized default addition assignment to row-major dense matrices (large matrices)*********
6370  template< typename MT3 // Type of the left-hand side target matrix
6371  , typename MT4 // Type of the left-hand side matrix operand
6372  , typename MT5 // Type of the right-hand side matrix operand
6373  , typename ST2 > // Type of the scalar value
6374  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6375  selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6376  {
6377  selectSmallAddAssignKernel( ~C, A, B, scalar );
6378  }
6379  //**********************************************************************************************
6380 
6381  //**Vectorized default addition assignment to column-major dense matrices (large matrices)******
6396  template< typename MT3 // Type of the left-hand side target matrix
6397  , typename MT4 // Type of the left-hand side matrix operand
6398  , typename MT5 // Type of the right-hand side matrix operand
6399  , typename ST2 > // Type of the scalar value
6400  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6401  selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6402  {
6403  typedef IntrinsicTrait<ElementType> IT;
6404 
6405  const size_t M( A.rows() );
6406  const size_t N( B.columns() );
6407  const size_t K( A.columns() );
6408 
6409  const size_t iblock( 128UL );
6410  const size_t jblock( 64UL );
6411  const size_t kblock( 128UL );
6412 
6413  const IntrinsicType factor( set( scalar ) );
6414 
6415  for( size_t ii=0UL; ii<M; ii+=iblock )
6416  {
6417  const size_t iend( min( ii+iblock, M ) );
6418 
6419  for( size_t jj=0UL; jj<N; jj+=jblock )
6420  {
6421  const size_t jend( min( jj+jblock, N ) );
6422 
6423  for( size_t kk=0UL; kk<K; kk+=kblock )
6424  {
6425  const size_t ktmp( min( kk+kblock, K ) );
6426 
6427  size_t i( ii );
6428 
6429  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
6430  {
6431  const size_t i1( i+IT::size );
6432  const size_t i2( i+IT::size*2UL );
6433  const size_t i3( i+IT::size*3UL );
6434 
6435  size_t j( jj );
6436 
6437  for( ; (j+2UL) <= jend; j+=2UL )
6438  {
6439  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6440  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6441  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
6442  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
6443 
6444  IntrinsicType xmm1( (~C).load(i ,j ) );
6445  IntrinsicType xmm2( (~C).load(i1,j ) );
6446  IntrinsicType xmm3( (~C).load(i2,j ) );
6447  IntrinsicType xmm4( (~C).load(i3,j ) );
6448  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
6449  IntrinsicType xmm6( (~C).load(i1,j+1UL) );
6450  IntrinsicType xmm7( (~C).load(i2,j+1UL) );
6451  IntrinsicType xmm8( (~C).load(i3,j+1UL) );
6452 
6453  for( size_t k=kbegin; k<kend; ++k ) {
6454  const IntrinsicType a1( A.load(i ,k) );
6455  const IntrinsicType a2( A.load(i1,k) );
6456  const IntrinsicType a3( A.load(i2,k) );
6457  const IntrinsicType a4( A.load(i3,k) );
6458  const IntrinsicType b1( set( B(k,j ) ) );
6459  const IntrinsicType b2( set( B(k,j+1UL) ) );
6460  xmm1 = xmm1 + a1 * b1;
6461  xmm2 = xmm2 + a2 * b1;
6462  xmm3 = xmm3 + a3 * b1;
6463  xmm4 = xmm4 + a4 * b1;
6464  xmm5 = xmm5 + a1 * b2;
6465  xmm6 = xmm6 + a2 * b2;
6466  xmm7 = xmm7 + a3 * b2;
6467  xmm8 = xmm8 + a4 * b2;
6468  }
6469 
6470  (~C).store( i , j , xmm1 * factor );
6471  (~C).store( i1, j , xmm2 * factor );
6472  (~C).store( i2, j , xmm3 * factor );
6473  (~C).store( i3, j , xmm4 * factor );
6474  (~C).store( i , j+1UL, xmm5 * factor );
6475  (~C).store( i1, j+1UL, xmm6 * factor );
6476  (~C).store( i2, j+1UL, xmm7 * factor );
6477  (~C).store( i3, j+1UL, xmm8 * factor );
6478  }
6479 
6480  if( j < jend )
6481  {
6482  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6483  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6484  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
6485  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
6486 
6487  IntrinsicType xmm1( (~C).load(i ,j) );
6488  IntrinsicType xmm2( (~C).load(i1,j) );
6489  IntrinsicType xmm3( (~C).load(i2,j) );
6490  IntrinsicType xmm4( (~C).load(i3,j) );
6491 
6492  for( size_t k=kbegin; k<kend; ++k ) {
6493  const IntrinsicType b1( set( B(k,j) ) );
6494  xmm1 = xmm1 + A.load(i ,k) * b1;
6495  xmm2 = xmm2 + A.load(i1,k) * b1;
6496  xmm3 = xmm3 + A.load(i2,k) * b1;
6497  xmm4 = xmm4 + A.load(i3,k) * b1;
6498  }
6499 
6500  (~C).store( i , j, xmm1 * factor );
6501  (~C).store( i1, j, xmm2 * factor );
6502  (~C).store( i2, j, xmm3 * factor );
6503  (~C).store( i3, j, xmm4 * factor );
6504  }
6505  }
6506 
6507  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
6508  {
6509  const size_t i1( i+IT::size );
6510 
6511  size_t j( jj );
6512 
6513  for( ; (j+4UL) <= jend; j+=4UL )
6514  {
6515  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6516  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6517  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
6518  ( IsUpper<MT5>::value )?( j+4UL ):( ktmp ) ) );
6519 
6520  IntrinsicType xmm1( (~C).load(i ,j ) );
6521  IntrinsicType xmm2( (~C).load(i1,j ) );
6522  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
6523  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
6524  IntrinsicType xmm5( (~C).load(i ,j+2UL) );
6525  IntrinsicType xmm6( (~C).load(i1,j+2UL) );
6526  IntrinsicType xmm7( (~C).load(i ,j+3UL) );
6527  IntrinsicType xmm8( (~C).load(i1,j+3UL) );
6528 
6529  for( size_t k=kbegin; k<kend; ++k ) {
6530  const IntrinsicType a1( A.load(i ,k) );
6531  const IntrinsicType a2( A.load(i1,k) );
6532  const IntrinsicType b1( set( B(k,j ) ) );
6533  const IntrinsicType b2( set( B(k,j+1UL) ) );
6534  const IntrinsicType b3( set( B(k,j+2UL) ) );
6535  const IntrinsicType b4( set( B(k,j+3UL) ) );
6536  xmm1 = xmm1 + a1 * b1;
6537  xmm2 = xmm2 + a2 * b1;
6538  xmm3 = xmm3 + a1 * b2;
6539  xmm4 = xmm4 + a2 * b2;
6540  xmm5 = xmm5 + a1 * b3;
6541  xmm6 = xmm6 + a2 * b3;
6542  xmm7 = xmm7 + a1 * b4;
6543  xmm8 = xmm8 + a2 * b4;
6544  }
6545 
6546  (~C).store( i , j , xmm1 * factor );
6547  (~C).store( i1, j , xmm2 * factor );
6548  (~C).store( i , j+1UL, xmm3 * factor );
6549  (~C).store( i1, j+1UL, xmm4 * factor );
6550  (~C).store( i , j+2UL, xmm5 * factor );
6551  (~C).store( i1, j+2UL, xmm6 * factor );
6552  (~C).store( i , j+3UL, xmm7 * factor );
6553  (~C).store( i1, j+3UL, xmm8 * factor );
6554  }
6555 
6556  for( ; (j+2UL) <= jend; j+=2UL )
6557  {
6558  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6559  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6560  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
6561  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
6562 
6563  IntrinsicType xmm1( (~C).load(i ,j ) );
6564  IntrinsicType xmm2( (~C).load(i1,j ) );
6565  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
6566  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
6567 
6568  for( size_t k=kbegin; k<kend; ++k ) {
6569  const IntrinsicType a1( A.load(i ,k) );
6570  const IntrinsicType a2( A.load(i1,k) );
6571  const IntrinsicType b1( set( B(k,j ) ) );
6572  const IntrinsicType b2( set( B(k,j+1UL) ) );
6573  xmm1 = xmm1 + a1 * b1;
6574  xmm2 = xmm2 + a2 * b1;
6575  xmm3 = xmm3 + a1 * b2;
6576  xmm4 = xmm4 + a2 * b2;
6577  }
6578 
6579  (~C).store( i , j , xmm1 * factor );
6580  (~C).store( i1, j , xmm2 * factor );
6581  (~C).store( i , j+1UL, xmm3 * factor );
6582  (~C).store( i1, j+1UL, xmm4 * factor );
6583  }
6584 
6585  if( j < jend )
6586  {
6587  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6588  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6589  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
6590  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
6591 
6592  IntrinsicType xmm1( (~C).load(i ,j) );
6593  IntrinsicType xmm2( (~C).load(i1,j) );
6594 
6595  for( size_t k=kbegin; k<kend; ++k ) {
6596  const IntrinsicType b1( set( B(k,j) ) );
6597  xmm1 = xmm1 + A.load(i ,k) * b1;
6598  xmm2 = xmm2 + A.load(i1,k) * b1;
6599  }
6600 
6601  (~C).store( i , j, xmm1 * factor );
6602  (~C).store( i1, j, xmm2 * factor );
6603  }
6604  }
6605 
6606  if( i < iend )
6607  {
6608  for( size_t j=jj; j<jend; ++j )
6609  {
6610  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
6611  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
6612  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size, ktmp ) ):( ktmp ),
6613  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
6614 
6615  IntrinsicType xmm1( (~C).load(i,j) );
6616 
6617  for( size_t k=kbegin; k<kend; ++k ) {
6618  const IntrinsicType b1( set( B(k,j) ) );
6619  xmm1 = xmm1 + A.load(i,k) * b1;
6620  }
6621 
6622  (~C).store( i, j, xmm1 * factor );
6623  }
6624  }
6625  }
6626  }
6627  }
6628  }
6629  //**********************************************************************************************
6630 
6631  //**BLAS-based addition assignment to dense matrices (default)**********************************
6646  template< typename MT3 // Type of the left-hand side target matrix
6647  , typename MT4 // Type of the left-hand side matrix operand
6648  , typename MT5 // Type of the right-hand side matrix operand
6649  , typename ST2 > // Type of the scalar value
6650  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6651  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6652  {
6653  selectLargeAddAssignKernel( C, A, B, scalar );
6654  }
6655  //**********************************************************************************************
6656 
6657  //**BLAS-based addition assignment to dense matrices (single precision)*************************
6658 #if BLAZE_BLAS_MODE
6659 
6672  template< typename MT3 // Type of the left-hand side target matrix
6673  , typename MT4 // Type of the left-hand side matrix operand
6674  , typename MT5 // Type of the right-hand side matrix operand
6675  , typename ST2 > // Type of the scalar value
6676  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
6677  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6678  {
6679  if( IsTriangular<MT4>::value ) {
6680  typename MT3::ResultType tmp( B );
6681  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
6682  addAssign( C, tmp );
6683  }
6684  else if( IsTriangular<MT5>::value ) {
6685  typename MT3::ResultType tmp( A );
6686  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
6687  addAssign( C, tmp );
6688  }
6689  else {
6690  sgemm( C, A, B, scalar, 1.0F );
6691  }
6692  }
6693 #endif
6694  //**********************************************************************************************
6695 
6696  //**BLAS-based addition assignment to dense matrices (double precision)*************************
6697 #if BLAZE_BLAS_MODE
6698 
6711  template< typename MT3 // Type of the left-hand side target matrix
6712  , typename MT4 // Type of the left-hand side matrix operand
6713  , typename MT5 // Type of the right-hand side matrix operand
6714  , typename ST2 > // Type of the scalar value
6715  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
6716  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6717  {
6718  if( IsTriangular<MT4>::value ) {
6719  typename MT3::ResultType tmp( B );
6720  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
6721  addAssign( C, tmp );
6722  }
6723  else if( IsTriangular<MT5>::value ) {
6724  typename MT3::ResultType tmp( A );
6725  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
6726  addAssign( C, tmp );
6727  }
6728  else {
6729  dgemm( C, A, B, scalar, 1.0 );
6730  }
6731  }
6732 #endif
6733  //**********************************************************************************************
6734 
6735  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
6736 #if BLAZE_BLAS_MODE
6737 
6750  template< typename MT3 // Type of the left-hand side target matrix
6751  , typename MT4 // Type of the left-hand side matrix operand
6752  , typename MT5 // Type of the right-hand side matrix operand
6753  , typename ST2 > // Type of the scalar value
6754  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
6755  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6756  {
6757  if( IsTriangular<MT4>::value ) {
6758  typename MT3::ResultType tmp( B );
6759  ctrmm( tmp, A, CblasLeft,
6760  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
6761  complex<float>( scalar, 0.0F ) );
6762  addAssign( C, tmp );
6763  }
6764  else if( IsTriangular<MT5>::value ) {
6765  typename MT3::ResultType tmp( A );
6766  ctrmm( tmp, B, CblasRight,
6767  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
6768  complex<float>( scalar, 0.0F ) );
6769  addAssign( C, tmp );
6770  }
6771  else {
6772  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
6773  }
6774  }
6775 #endif
6776  //**********************************************************************************************
6777 
6778  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
6779 #if BLAZE_BLAS_MODE
6780 
6793  template< typename MT3 // Type of the left-hand side target matrix
6794  , typename MT4 // Type of the left-hand side matrix operand
6795  , typename MT5 // Type of the right-hand side matrix operand
6796  , typename ST2 > // Type of the scalar value
6797  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
6798  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6799  {
6800  if( IsTriangular<MT4>::value ) {
6801  typename MT3::ResultType tmp( B );
6802  ztrmm( tmp, A, CblasLeft,
6803  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
6804  complex<double>( scalar, 0.0 ) );
6805  addAssign( C, tmp );
6806  }
6807  else if( IsTriangular<MT5>::value ) {
6808  typename MT3::ResultType tmp( A );
6809  ztrmm( tmp, B, CblasRight,
6810  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
6811  complex<double>( scalar, 0.0 ) );
6812  addAssign( C, tmp );
6813  }
6814  else {
6815  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
6816  }
6817  }
6818 #endif
6819  //**********************************************************************************************
6820 
6821  //**Restructuring addition assignment to row-major matrices*************************************
6836  template< typename MT > // Type of the target matrix
6837  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
6838  addAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
6839  {
6841 
6843 
6844  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6845  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6846 
6847  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
6848  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
6849 
6850  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
6851  addAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
6852  else if( IsSymmetric<MT1>::value )
6853  addAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
6854  else
6855  addAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
6856  }
6857  //**********************************************************************************************
6858 
6859  //**Addition assignment to sparse matrices******************************************************
6860  // No special implementation for the addition assignment to sparse matrices.
6861  //**********************************************************************************************
6862 
6863  //**Subtraction assignment to dense matrices****************************************************
6875  template< typename MT // Type of the target dense matrix
6876  , bool SO > // Storage order of the target dense matrix
6877  friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
6878  subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
6879  {
6881 
6882  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6883  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6884 
6885  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
6886  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
6887 
6888  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
6889  return;
6890  }
6891 
6892  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
6893  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
6894 
6895  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
6896  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
6897  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
6898  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
6899  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
6900  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
6901 
6902  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
6903  }
6904  //**********************************************************************************************
6905 
6906  //**Subtraction assignment to dense matrices (kernel selection)*********************************
6917  template< typename MT3 // Type of the left-hand side target matrix
6918  , typename MT4 // Type of the left-hand side matrix operand
6919  , typename MT5 // Type of the right-hand side matrix operand
6920  , typename ST2 > // Type of the scalar value
6921  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6922  {
6923  if( ( IsDiagonal<MT4>::value ) ||
6924  ( C.rows() * C.columns() < TDMATTDMATMULT_THRESHOLD ) )
6925  selectSmallSubAssignKernel( C, A, B, scalar );
6926  else
6927  selectBlasSubAssignKernel( C, A, B, scalar );
6928  }
6929  //**********************************************************************************************
6930 
6931  //**Default subtraction assignment to dense matrices (general/general)**************************
6945  template< typename MT3 // Type of the left-hand side target matrix
6946  , typename MT4 // Type of the left-hand side matrix operand
6947  , typename MT5 // Type of the right-hand side matrix operand
6948  , typename ST2 > // Type of the scalar value
6949  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
6950  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6951  {
6952  const ResultType tmp( serial( A * B * scalar ) );
6953  subAssign( C, tmp );
6954  }
6955  //**********************************************************************************************
6956 
6957  //**Default subtraction assignment to dense matrices (general/diagonal)*************************
6971  template< typename MT3 // Type of the left-hand side target matrix
6972  , typename MT4 // Type of the left-hand side matrix operand
6973  , typename MT5 // Type of the right-hand side matrix operand
6974  , typename ST2 > // Type of the scalar value
6975  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
6976  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6977  {
6979 
6980  const size_t M( A.rows() );
6981  const size_t N( B.columns() );
6982 
6983  for( size_t j=0UL; j<N; ++j )
6984  {
6985  const size_t ibegin( ( IsLower<MT4>::value )
6986  ?( IsStrictlyLower<MT4>::value ? j+1UL : j )
6987  :( 0UL ) );
6988  const size_t iend( ( IsUpper<MT4>::value )
6989  ?( IsStrictlyUpper<MT4>::value ? j : j+1UL )
6990  :( M ) );
6991  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
6992 
6993  const size_t inum( iend - ibegin );
6994  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
6995 
6996  for( size_t i=ibegin; i<ipos; i+=2UL ) {
6997  C(i ,j) -= A(i ,j) * B(j,j) * scalar;
6998  C(i+1UL,j) -= A(i+1UL,j) * B(j,j) * scalar;
6999  }
7000  if( ipos < iend ) {
7001  C(ipos,j) -= A(ipos,j) * B(j,j) * scalar;
7002  }
7003  }
7004  }
7005  //**********************************************************************************************
7006 
7007  //**Default subtraction assignment to dense matrices (diagonal/general)*************************
7021  template< typename MT3 // Type of the left-hand side target matrix
7022  , typename MT4 // Type of the left-hand side matrix operand
7023  , typename MT5 // Type of the right-hand side matrix operand
7024  , typename ST2 > // Type of the scalar value
7025  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
7026  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7027  {
7029 
7030  const size_t M( A.rows() );
7031  const size_t N( B.columns() );
7032 
7033  for( size_t j=0UL; j<N; ++j )
7034  {
7035  const size_t ibegin( ( IsLower<MT5>::value )
7036  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
7037  :( 0UL ) );
7038  const size_t iend( ( IsUpper<MT5>::value )
7039  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
7040  :( M ) );
7041  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
7042 
7043  const size_t inum( iend - ibegin );
7044  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
7045 
7046  for( size_t i=ibegin; i<ipos; i+=2UL ) {
7047  C(i ,j) -= A(i ,i ) * B(i ,j) * scalar;
7048  C(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
7049  }
7050  if( ipos < iend ) {
7051  C(ipos,j) -= A(ipos,ipos) * B(ipos,j) * scalar;
7052  }
7053  }
7054  }
7055  //**********************************************************************************************
7056 
7057  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
7071  template< typename MT3 // Type of the left-hand side target matrix
7072  , typename MT4 // Type of the left-hand side matrix operand
7073  , typename MT5 // Type of the right-hand side matrix operand
7074  , typename ST2 > // Type of the scalar value
7075  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
7076  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7077  {
7079 
7080  for( size_t i=0UL; i<A.rows(); ++i ) {
7081  C(i,i) -= A(i,i) * B(i,i) * scalar;
7082  }
7083  }
7084  //**********************************************************************************************
7085 
7086  //**Default subtraction assignment to dense matrices (small matrices)***************************
7100  template< typename MT3 // Type of the left-hand side target matrix
7101  , typename MT4 // Type of the left-hand side matrix operand
7102  , typename MT5 // Type of the right-hand side matrix operand
7103  , typename ST2 > // Type of the scalar value
7104  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7105  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7106  {
7107  selectDefaultSubAssignKernel( C, A, B, scalar );
7108  }
7109  //**********************************************************************************************
7110 
7111  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
7126  template< typename MT3 // Type of the left-hand side target matrix
7127  , typename MT4 // Type of the left-hand side matrix operand
7128  , typename MT5 // Type of the right-hand side matrix operand
7129  , typename ST2 > // Type of the scalar value
7130  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7131  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7132  {
7137 
7138  if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
7139  const typename MT5::OppositeType tmp( serial( B ) );
7140  subAssign( ~C, A * tmp * scalar );
7141  }
7142  else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
7143  const typename MT4::OppositeType tmp( serial( A ) );
7144  subAssign( ~C, tmp * B * scalar );
7145  }
7146  else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
7147  const typename MT5::OppositeType tmp( serial( B ) );
7148  subAssign( ~C, A * tmp * scalar );
7149  }
7150  else {
7151  const typename MT4::OppositeType tmp( serial( A ) );
7152  subAssign( ~C, tmp * B * scalar );
7153  }
7154  }
7155  //**********************************************************************************************
7156 
7157  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
7172  template< typename MT3 // Type of the left-hand side target matrix
7173  , typename MT4 // Type of the left-hand side matrix operand
7174  , typename MT5 // Type of the right-hand side matrix operand
7175  , typename ST2 > // Type of the scalar value
7176  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7177  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7178  {
7179  typedef IntrinsicTrait<ElementType> IT;
7180 
7181  const size_t M( A.rows() );
7182  const size_t N( B.columns() );
7183  const size_t K( A.columns() );
7184 
7185  const IntrinsicType factor( set( scalar ) );
7186 
7187  size_t i( 0UL );
7188 
7189  for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
7190  for( size_t j=0UL; j<N; ++j )
7191  {
7192  const size_t kbegin( ( IsLower<MT5>::value )
7193  ?( ( IsUpper<MT4>::value )
7194  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
7195  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
7196  :( IsUpper<MT4>::value ? i : 0UL ) );
7197  const size_t kend( ( IsUpper<MT5>::value )
7198  ?( ( IsLower<MT4>::value )
7199  ?( min( i+IT::size*8UL, K, ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
7200  :( IsStrictlyUpper<MT5>::value ? j : j+1UL ) )
7201  :( IsLower<MT4>::value ? min( i+IT::size*8UL, K ) : K ) );
7202 
7203  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7204 
7205  for( size_t k=kbegin; k<kend; ++k ) {
7206  const IntrinsicType b1( set( B(k,j) ) );
7207  xmm1 = xmm1 + A.load(i ,k) * b1;
7208  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
7209  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
7210  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
7211  xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
7212  xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
7213  xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
7214  xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
7215  }
7216 
7217  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
7218  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
7219  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
7220  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
7221  (~C).store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) - xmm5 * factor );
7222  (~C).store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) - xmm6 * factor );
7223  (~C).store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) - xmm7 * factor );
7224  (~C).store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) - xmm8 * factor );
7225  }
7226  }
7227 
7228  for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL )
7229  {
7230  size_t j( 0UL );
7231 
7232  for( ; (j+2UL) <= N; j+=2UL )
7233  {
7234  const size_t kbegin( ( IsLower<MT5>::value )
7235  ?( ( IsUpper<MT4>::value )
7236  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
7237  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
7238  :( IsUpper<MT4>::value ? i : 0UL ) );
7239  const size_t kend( ( IsUpper<MT5>::value )
7240  ?( ( IsLower<MT4>::value )
7241  ?( min( i+IT::size*4UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
7242  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
7243  :( IsLower<MT4>::value ? min( i+IT::size*4UL, K ) : K ) );
7244 
7245  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7246 
7247  for( size_t k=kbegin; k<kend; ++k ) {
7248  const IntrinsicType a1( A.load(i ,k) );
7249  const IntrinsicType a2( A.load(i+IT::size ,k) );
7250  const IntrinsicType a3( A.load(i+IT::size*2UL,k) );
7251  const IntrinsicType a4( A.load(i+IT::size*3UL,k) );
7252  const IntrinsicType b1( set( B(k,j ) ) );
7253  const IntrinsicType b2( set( B(k,j+1UL) ) );
7254  xmm1 = xmm1 + a1 * b1;
7255  xmm2 = xmm2 + a2 * b1;
7256  xmm3 = xmm3 + a3 * b1;
7257  xmm4 = xmm4 + a4 * b1;
7258  xmm5 = xmm5 + a1 * b2;
7259  xmm6 = xmm6 + a2 * b2;
7260  xmm7 = xmm7 + a3 * b2;
7261  xmm8 = xmm8 + a4 * b2;
7262  }
7263 
7264  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7265  (~C).store( i+IT::size , j , (~C).load(i+IT::size ,j ) - xmm2 * factor );
7266  (~C).store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) - xmm3 * factor );
7267  (~C).store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) - xmm4 * factor );
7268  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
7269  (~C).store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) - xmm6 * factor );
7270  (~C).store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) - xmm7 * factor );
7271  (~C).store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) - xmm8 * factor );
7272  }
7273 
7274  if( j < N )
7275  {
7276  const size_t kbegin( ( IsLower<MT5>::value )
7277  ?( ( IsUpper<MT4>::value )
7278  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
7279  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
7280  :( IsUpper<MT4>::value ? i : 0UL ) );
7281  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, K ) ):( K ) );
7282 
7283  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7284 
7285  for( size_t k=kbegin; k<kend; ++k ) {
7286  const IntrinsicType b1( set( B(k,j) ) );
7287  xmm1 = xmm1 + A.load(i ,k) * b1;
7288  xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
7289  xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
7290  xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
7291  }
7292 
7293  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
7294  (~C).store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
7295  (~C).store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
7296  (~C).store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
7297  }
7298  }
7299 
7300  for( ; (i+IT::size) < M; i+=IT::size*2UL )
7301  {
7302  size_t j( 0UL );
7303 
7304  for( ; (j+2UL) <= N; j+=2UL )
7305  {
7306  const size_t kbegin( ( IsLower<MT5>::value )
7307  ?( ( IsUpper<MT4>::value )
7308  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
7309  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
7310  :( IsUpper<MT4>::value ? i : 0UL ) );
7311  const size_t kend( ( IsUpper<MT5>::value )
7312  ?( ( IsLower<MT4>::value )
7313  ?( min( i+IT::size*2UL, K, ( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) ) )
7314  :( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL ) )
7315  :( IsLower<MT4>::value ? min( i+IT::size*2UL, K ) : K ) );
7316 
7317  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7318 
7319  for( size_t k=kbegin; k<kend; ++k ) {
7320  const IntrinsicType a1( A.load(i ,k) );
7321  const IntrinsicType a2( A.load(i+IT::size,k) );
7322  const IntrinsicType b1( set( B(k,j ) ) );
7323  const IntrinsicType b2( set( B(k,j+1UL) ) );
7324  xmm1 = xmm1 + a1 * b1;
7325  xmm2 = xmm2 + a2 * b1;
7326  xmm3 = xmm3 + a1 * b2;
7327  xmm4 = xmm4 + a2 * b2;
7328  }
7329 
7330  (~C).store( i , j , (~C).load(i ,j ) - xmm1 * factor );
7331  (~C).store( i+IT::size, j , (~C).load(i+IT::size,j ) - xmm2 * factor );
7332  (~C).store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
7333  (~C).store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) - xmm4 * factor );
7334  }
7335 
7336  if( j < N )
7337  {
7338  const size_t kbegin( ( IsLower<MT5>::value )
7339  ?( ( IsUpper<MT4>::value )
7340  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
7341  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
7342  :( IsUpper<MT4>::value ? i : 0UL ) );
7343  const size_t kend( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, K ) ):( K ) );
7344 
7345  IntrinsicType xmm1, xmm2;
7346 
7347  for( size_t k=kbegin; k<kend; ++k ) {
7348  const IntrinsicType b1( set( B(k,j) ) );
7349  xmm1 = xmm1 + A.load(i ,k) * b1;
7350  xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
7351  }
7352 
7353  (~C).store( i , j, (~C).load(i ,j) - xmm1 * factor );
7354  (~C).store( i+IT::size, j, (~C).load(i+IT::size,j) - xmm2 * factor );
7355  }
7356  }
7357 
7358  if( i < M )
7359  {
7360  size_t j( 0UL );
7361 
7362  for( ; (j+2UL) <= N; j+=2UL )
7363  {
7364  const size_t kbegin( ( IsLower<MT5>::value )
7365  ?( ( IsUpper<MT4>::value )
7366  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
7367  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
7368  :( IsUpper<MT4>::value ? i : 0UL ) );
7369  const size_t kend( ( IsUpper<MT5>::value )
7370  ?( IsStrictlyUpper<MT5>::value ? j+1UL : j+2UL )
7371  :( K ) );
7372 
7373  IntrinsicType xmm1, xmm2;
7374 
7375  for( size_t k=kbegin; k<kend; ++k ) {
7376  const IntrinsicType a1( A.load(i,k) );
7377  xmm1 = xmm1 + a1 * set( B(k,j ) );
7378  xmm2 = xmm2 + a1 * set( B(k,j+1UL) );
7379  }
7380 
7381  (~C).store( i, j , (~C).load(i,j ) - xmm1 * factor );
7382  (~C).store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
7383  }
7384 
7385  if( j < N )
7386  {
7387  const size_t kbegin( ( IsLower<MT5>::value )
7388  ?( ( IsUpper<MT4>::value )
7389  ?( max( i, ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
7390  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
7391  :( IsUpper<MT4>::value ? i : 0UL ) );
7392 
7393  IntrinsicType xmm1;
7394 
7395  for( size_t k=kbegin; k<K; ++k ) {
7396  xmm1 = xmm1 + A.load(i,k) * set( B(k,j) );
7397  }
7398 
7399  (~C).store( i, j, (~C).load(i,j) - xmm1 * factor );
7400  }
7401  }
7402  }
7403  //**********************************************************************************************
7404 
7405  //**Default subtraction assignment to dense matrices (large matrices)***************************
7419  template< typename MT3 // Type of the left-hand side target matrix
7420  , typename MT4 // Type of the left-hand side matrix operand
7421  , typename MT5 // Type of the right-hand side matrix operand
7422  , typename ST2 > // Type of the scalar value
7423  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7424  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7425  {
7426  selectDefaultSubAssignKernel( C, A, B, scalar );
7427  }
7428  //**********************************************************************************************
7429 
7430  //**Vectorized default subtraction assignment to row-major dense matrices (large matrices)******
7445  template< typename MT3 // Type of the left-hand side target matrix
7446  , typename MT4 // Type of the left-hand side matrix operand
7447  , typename MT5 // Type of the right-hand side matrix operand
7448  , typename ST2 > // Type of the scalar value
7449  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7450  selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7451  {
7452  selectSmallSubAssignKernel( ~C, A, B, scalar );
7453  }
7454  //**********************************************************************************************
7455 
7456  //**Vectorized default subtraction assignment to column-major dense matrices (large matrices)***
7471  template< typename MT3 // Type of the left-hand side target matrix
7472  , typename MT4 // Type of the left-hand side matrix operand
7473  , typename MT5 // Type of the right-hand side matrix operand
7474  , typename ST2 > // Type of the scalar value
7475  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7476  selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7477  {
7478  typedef IntrinsicTrait<ElementType> IT;
7479 
7480  const size_t M( A.rows() );
7481  const size_t N( B.columns() );
7482  const size_t K( A.columns() );
7483 
7484  const size_t iblock( 128UL );
7485  const size_t jblock( 64UL );
7486  const size_t kblock( 128UL );
7487 
7488  const IntrinsicType factor( set( scalar ) );
7489 
7490  for( size_t ii=0UL; ii<M; ii+=iblock )
7491  {
7492  const size_t iend( min( ii+iblock, M ) );
7493 
7494  for( size_t jj=0UL; jj<N; jj+=jblock )
7495  {
7496  const size_t jend( min( jj+jblock, N ) );
7497 
7498  for( size_t kk=0UL; kk<K; kk+=kblock )
7499  {
7500  const size_t ktmp( min( kk+kblock, K ) );
7501 
7502  size_t i( ii );
7503 
7504  for( ; (i+IT::size*3UL) < iend; i+=IT::size*4UL )
7505  {
7506  const size_t i1( i+IT::size );
7507  const size_t i2( i+IT::size*2UL );
7508  const size_t i3( i+IT::size*3UL );
7509 
7510  size_t j( jj );
7511 
7512  for( ; (j+2UL) <= jend; j+=2UL )
7513  {
7514  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7515  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7516  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
7517  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
7518 
7519  IntrinsicType xmm1( (~C).load(i ,j ) );
7520  IntrinsicType xmm2( (~C).load(i1,j ) );
7521  IntrinsicType xmm3( (~C).load(i2,j ) );
7522  IntrinsicType xmm4( (~C).load(i3,j ) );
7523  IntrinsicType xmm5( (~C).load(i ,j+1UL) );
7524  IntrinsicType xmm6( (~C).load(i1,j+1UL) );
7525  IntrinsicType xmm7( (~C).load(i2,j+1UL) );
7526  IntrinsicType xmm8( (~C).load(i3,j+1UL) );
7527 
7528  for( size_t k=kbegin; k<kend; ++k ) {
7529  const IntrinsicType a1( A.load(i ,k) );
7530  const IntrinsicType a2( A.load(i1,k) );
7531  const IntrinsicType a3( A.load(i2,k) );
7532  const IntrinsicType a4( A.load(i3,k) );
7533  const IntrinsicType b1( set( B(k,j ) ) );
7534  const IntrinsicType b2( set( B(k,j+1UL) ) );
7535  xmm1 = xmm1 - a1 * b1;
7536  xmm2 = xmm2 - a2 * b1;
7537  xmm3 = xmm3 - a3 * b1;
7538  xmm4 = xmm4 - a4 * b1;
7539  xmm5 = xmm5 - a1 * b2;
7540  xmm6 = xmm6 - a2 * b2;
7541  xmm7 = xmm7 - a3 * b2;
7542  xmm8 = xmm8 - a4 * b2;
7543  }
7544 
7545  (~C).store( i , j , xmm1 * factor );
7546  (~C).store( i1, j , xmm2 * factor );
7547  (~C).store( i2, j , xmm3 * factor );
7548  (~C).store( i3, j , xmm4 * factor );
7549  (~C).store( i , j+1UL, xmm5 * factor );
7550  (~C).store( i1, j+1UL, xmm6 * factor );
7551  (~C).store( i2, j+1UL, xmm7 * factor );
7552  (~C).store( i3, j+1UL, xmm8 * factor );
7553  }
7554 
7555  if( j < jend )
7556  {
7557  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7558  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7559  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*4UL, ktmp ) ):( ktmp ),
7560  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
7561 
7562  IntrinsicType xmm1( (~C).load(i ,j) );
7563  IntrinsicType xmm2( (~C).load(i1,j) );
7564  IntrinsicType xmm3( (~C).load(i2,j) );
7565  IntrinsicType xmm4( (~C).load(i3,j) );
7566 
7567  for( size_t k=kbegin; k<kend; ++k ) {
7568  const IntrinsicType b1( set( B(k,j) ) );
7569  xmm1 = xmm1 - A.load(i ,k) * b1;
7570  xmm2 = xmm2 - A.load(i1,k) * b1;
7571  xmm3 = xmm3 - A.load(i2,k) * b1;
7572  xmm4 = xmm4 - A.load(i3,k) * b1;
7573  }
7574 
7575  (~C).store( i , j, xmm1 * factor );
7576  (~C).store( i1, j, xmm2 * factor );
7577  (~C).store( i2, j, xmm3 * factor );
7578  (~C).store( i3, j, xmm4 * factor );
7579  }
7580  }
7581 
7582  for( ; (i+IT::size) < iend; i+=IT::size*2UL )
7583  {
7584  const size_t i1( i+IT::size );
7585 
7586  size_t j( jj );
7587 
7588  for( ; (j+4UL) <= jend; j+=4UL )
7589  {
7590  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7591  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7592  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
7593  ( IsUpper<MT5>::value )?( j+4UL ):( ktmp ) ) );
7594 
7595  IntrinsicType xmm1( (~C).load(i ,j ) );
7596  IntrinsicType xmm2( (~C).load(i1,j ) );
7597  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
7598  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
7599  IntrinsicType xmm5( (~C).load(i ,j+2UL) );
7600  IntrinsicType xmm6( (~C).load(i1,j+2UL) );
7601  IntrinsicType xmm7( (~C).load(i ,j+3UL) );
7602  IntrinsicType xmm8( (~C).load(i1,j+3UL) );
7603 
7604  for( size_t k=kbegin; k<kend; ++k ) {
7605  const IntrinsicType a1( A.load(i ,k) );
7606  const IntrinsicType a2( A.load(i1,k) );
7607  const IntrinsicType b1( set( B(k,j ) ) );
7608  const IntrinsicType b2( set( B(k,j+1UL) ) );
7609  const IntrinsicType b3( set( B(k,j+2UL) ) );
7610  const IntrinsicType b4( set( B(k,j+3UL) ) );
7611  xmm1 = xmm1 - a1 * b1;
7612  xmm2 = xmm2 - a2 * b1;
7613  xmm3 = xmm3 - a1 * b2;
7614  xmm4 = xmm4 - a2 * b2;
7615  xmm5 = xmm5 - a1 * b3;
7616  xmm6 = xmm6 - a2 * b3;
7617  xmm7 = xmm7 - a1 * b4;
7618  xmm8 = xmm8 - a2 * b4;
7619  }
7620 
7621  (~C).store( i , j , xmm1 * factor );
7622  (~C).store( i1, j , xmm2 * factor );
7623  (~C).store( i , j+1UL, xmm3 * factor );
7624  (~C).store( i1, j+1UL, xmm4 * factor );
7625  (~C).store( i , j+2UL, xmm5 * factor );
7626  (~C).store( i1, j+2UL, xmm6 * factor );
7627  (~C).store( i , j+3UL, xmm7 * factor );
7628  (~C).store( i1, j+3UL, xmm8 * factor );
7629  }
7630 
7631  for( ; (j+2UL) <= jend; j+=2UL )
7632  {
7633  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7634  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7635  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
7636  ( IsUpper<MT5>::value )?( j+2UL ):( ktmp ) ) );
7637 
7638  IntrinsicType xmm1( (~C).load(i ,j ) );
7639  IntrinsicType xmm2( (~C).load(i1,j ) );
7640  IntrinsicType xmm3( (~C).load(i ,j+1UL) );
7641  IntrinsicType xmm4( (~C).load(i1,j+1UL) );
7642 
7643  for( size_t k=kbegin; k<kend; ++k ) {
7644  const IntrinsicType a1( A.load(i ,k) );
7645  const IntrinsicType a2( A.load(i1,k) );
7646  const IntrinsicType b1( set( B(k,j ) ) );
7647  const IntrinsicType b2( set( B(k,j+1UL) ) );
7648  xmm1 = xmm1 - a1 * b1;
7649  xmm2 = xmm2 - a2 * b1;
7650  xmm3 = xmm3 - a1 * b2;
7651  xmm4 = xmm4 - a2 * b2;
7652  }
7653 
7654  (~C).store( i , j , xmm1 * factor );
7655  (~C).store( i1, j , xmm2 * factor );
7656  (~C).store( i , j+1UL, xmm3 * factor );
7657  (~C).store( i1, j+1UL, xmm4 * factor );
7658  }
7659 
7660  if( j < jend )
7661  {
7662  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7663  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7664  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size*2UL, ktmp ) ):( ktmp ),
7665  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
7666 
7667  IntrinsicType xmm1( (~C).load(i ,j) );
7668  IntrinsicType xmm2( (~C).load(i1,j) );
7669 
7670  for( size_t k=kbegin; k<kend; ++k ) {
7671  const IntrinsicType b1( set( B(k,j) ) );
7672  xmm1 = xmm1 - A.load(i ,k) * b1;
7673  xmm2 = xmm2 - A.load(i1,k) * b1;
7674  }
7675 
7676  (~C).store( i , j, xmm1 * factor );
7677  (~C).store( i1, j, xmm2 * factor );
7678  }
7679  }
7680 
7681  if( i < iend )
7682  {
7683  for( size_t j=jj; j<jend; ++j )
7684  {
7685  const size_t kbegin( max( ( IsUpper<MT4>::value )?( max( i, kk ) ):( kk ),
7686  ( IsLower<MT5>::value )?( max( j, kk ) ):( kk ) ) );
7687  const size_t kend ( min( ( IsLower<MT4>::value )?( min( i+IT::size, ktmp ) ):( ktmp ),
7688  ( IsUpper<MT5>::value )?( j+1UL ):( ktmp ) ) );
7689 
7690  IntrinsicType xmm1( (~C).load(i,j) );
7691 
7692  for( size_t k=kbegin; k<kend; ++k ) {
7693  const IntrinsicType b1( set( B(k,j) ) );
7694  xmm1 = xmm1 - A.load(i,k) * b1;
7695  }
7696 
7697  (~C).store( i, j, xmm1 * factor );
7698  }
7699  }
7700  }
7701  }
7702  }
7703  }
7704  //**********************************************************************************************
7705 
7706  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
7721  template< typename MT3 // Type of the left-hand side target matrix
7722  , typename MT4 // Type of the left-hand side matrix operand
7723  , typename MT5 // Type of the right-hand side matrix operand
7724  , typename ST2 > // Type of the scalar value
7725  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7726  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7727  {
7728  selectLargeSubAssignKernel( C, A, B, scalar );
7729  }
7730  //**********************************************************************************************
7731 
7732  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
7733 #if BLAZE_BLAS_MODE
7734 
7747  template< typename MT3 // Type of the left-hand side target matrix
7748  , typename MT4 // Type of the left-hand side matrix operand
7749  , typename MT5 // Type of the right-hand side matrix operand
7750  , typename ST2 > // Type of the scalar value
7751  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
7752  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7753  {
7754  if( IsTriangular<MT4>::value ) {
7755  typename MT3::ResultType tmp( B );
7756  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
7757  subAssign( C, tmp );
7758  }
7759  else if( IsTriangular<MT5>::value ) {
7760  typename MT3::ResultType tmp( A );
7761  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
7762  subAssign( C, tmp );
7763  }
7764  else {
7765  sgemm( C, A, B, -scalar, 1.0F );
7766  }
7767  }
7768 #endif
7769  //**********************************************************************************************
7770 
7771  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
7772 #if BLAZE_BLAS_MODE
7773 
7786  template< typename MT3 // Type of the left-hand side target matrix
7787  , typename MT4 // Type of the left-hand side matrix operand
7788  , typename MT5 // Type of the right-hand side matrix operand
7789  , typename ST2 > // Type of the scalar value
7790  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
7791  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7792  {
7793  if( IsTriangular<MT4>::value ) {
7794  typename MT3::ResultType tmp( B );
7795  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
7796  subAssign( C, tmp );
7797  }
7798  else if( IsTriangular<MT5>::value ) {
7799  typename MT3::ResultType tmp( A );
7800  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
7801  subAssign( C, tmp );
7802  }
7803  else {
7804  dgemm( C, A, B, -scalar, 1.0 );
7805  }
7806  }
7807 #endif
7808  //**********************************************************************************************
7809 
7810  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
7811 #if BLAZE_BLAS_MODE
7812 
7825  template< typename MT3 // Type of the left-hand side target matrix
7826  , typename MT4 // Type of the left-hand side matrix operand
7827  , typename MT5 // Type of the right-hand side matrix operand
7828  , typename ST2 > // Type of the scalar value
7829  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
7830  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7831  {
7832  if( IsTriangular<MT4>::value ) {
7833  typename MT3::ResultType tmp( B );
7834  ctrmm( tmp, A, CblasLeft,
7835  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
7836  complex<float>( scalar, 0.0F ) );
7837  subAssign( C, tmp );
7838  }
7839  else if( IsTriangular<MT5>::value ) {
7840  typename MT3::ResultType tmp( A );
7841  ctrmm( tmp, B, CblasRight,
7842  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
7843  complex<float>( scalar, 0.0F ) );
7844  subAssign( C, tmp );
7845  }
7846  else {
7847  cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
7848  }
7849  }
7850 #endif
7851  //**********************************************************************************************
7852 
7853  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
7854 #if BLAZE_BLAS_MODE
7855 
7868  template< typename MT3 // Type of the left-hand side target matrix
7869  , typename MT4 // Type of the left-hand side matrix operand
7870  , typename MT5 // Type of the right-hand side matrix operand
7871  , typename ST2 > // Type of the scalar value
7872  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
7873  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7874  {
7875  if( IsTriangular<MT4>::value ) {
7876  typename MT3::ResultType tmp( B );
7877  ztrmm( tmp, A, CblasLeft,
7878  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
7879  complex<float>( scalar, 0.0 ) );
7880  subAssign( C, tmp );
7881  }
7882  else if( IsTriangular<MT5>::value ) {
7883  typename MT3::ResultType tmp( A );
7884  ztrmm( tmp, B, CblasRight,
7885  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
7886  complex<float>( scalar, 0.0 ) );
7887  subAssign( C, tmp );
7888  }
7889  else {
7890  zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
7891  }
7892  }
7893 #endif
7894  //**********************************************************************************************
7895 
7896  //**Restructuring subtraction assignment to row-major matrices**********************************
7910  template< typename MT > // Type of the target matrix
7911  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
7912  subAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
7913  {
7915 
7917 
7918  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7919  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7920 
7921  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
7922  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
7923 
7924  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
7925  subAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
7926  else if( IsSymmetric<MT1>::value )
7927  subAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
7928  else
7929  subAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
7930  }
7931  //**********************************************************************************************
7932 
7933  //**Subtraction assignment to sparse matrices***************************************************
7934  // No special implementation for the subtraction assignment to sparse matrices.
7935  //**********************************************************************************************
7936 
7937  //**Multiplication assignment to dense matrices*************************************************
7938  // No special implementation for the multiplication assignment to dense matrices.
7939  //**********************************************************************************************
7940 
7941  //**Multiplication assignment to sparse matrices************************************************
7942  // No special implementation for the multiplication assignment to sparse matrices.
7943  //**********************************************************************************************
7944 
7945  //**SMP assignment to dense matrices************************************************************
7960  template< typename MT // Type of the target dense matrix
7961  , bool SO > // Storage order of the target dense matrix
7962  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
7963  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7964  {
7966 
7967  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7968  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7969 
7970  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
7971  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
7972 
7973  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
7974  return;
7975  }
7976  else if( left.columns() == 0UL ) {
7977  reset( ~lhs );
7978  return;
7979  }
7980 
7981  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7982  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7983 
7984  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7985  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7986  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7987  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7988  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7989  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7990 
7991  smpAssign( ~lhs, A * B * rhs.scalar_ );
7992  }
7993  //**********************************************************************************************
7994 
7995  //**SMP assignment to sparse matrices***********************************************************
8010  template< typename MT // Type of the target sparse matrix
8011  , bool SO > // Storage order of the target sparse matrix
8012  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
8013  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
8014  {
8016 
8017  typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
8018 
8025 
8026  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8027  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8028 
8029  const TmpType tmp( rhs );
8030  smpAssign( ~lhs, tmp );
8031  }
8032  //**********************************************************************************************
8033 
8034  //**Restructuring SMP assignment to row-major matrices******************************************
8048  template< typename MT > // Type of the target matrix
8049  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
8050  smpAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
8051  {
8053 
8055 
8056  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8057  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8058 
8059  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8060  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8061 
8062  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
8063  smpAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
8064  else if( IsSymmetric<MT1>::value )
8065  smpAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
8066  else
8067  smpAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
8068  }
8069  //**********************************************************************************************
8070 
8071  //**SMP addition assignment to dense matrices***************************************************
8086  template< typename MT // Type of the target dense matrix
8087  , bool SO > // Storage order of the target dense matrix
8088  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
8089  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
8090  {
8092 
8093  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8094  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8095 
8096  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8097  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8098 
8099  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
8100  return;
8101  }
8102 
8103  LT A( left ); // Evaluation of the left-hand side dense matrix operand
8104  RT B( right ); // Evaluation of the right-hand side dense matrix operand
8105 
8106  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
8107  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
8108  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
8109  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
8110  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
8111  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
8112 
8113  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
8114  }
8115  //**********************************************************************************************
8116 
8117  //**Restructuring SMP addition assignment to row-major matrices*********************************
8132  template< typename MT > // Type of the target matrix
8133  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
8134  smpAddAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
8135  {
8137 
8139 
8140  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8141  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8142 
8143  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8144  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8145 
8146  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
8147  smpAddAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
8148  else if( IsSymmetric<MT1>::value )
8149  smpAddAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
8150  else
8151  smpAddAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
8152  }
8153  //**********************************************************************************************
8154 
8155  //**SMP addition assignment to sparse matrices**************************************************
8156  // No special implementation for the SMP addition assignment to sparse matrices.
8157  //**********************************************************************************************
8158 
8159  //**SMP subtraction assignment to dense matrices************************************************
8174  template< typename MT // Type of the target dense matrix
8175  , bool SO > // Storage order of the target dense matrix
8176  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
8177  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
8178  {
8180 
8181  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8182  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8183 
8184  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8185  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8186 
8187  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
8188  return;
8189  }
8190 
8191  LT A( left ); // Evaluation of the left-hand side dense matrix operand
8192  RT B( right ); // Evaluation of the right-hand side dense matrix operand
8193 
8194  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
8195  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
8196  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
8197  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
8198  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
8199  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
8200 
8201  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
8202  }
8203  //**********************************************************************************************
8204 
8205  //**Restructuring SMP subtraction assignment to row-major matrices******************************
8220  template< typename MT > // Type of the target matrix
8221  friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
8222  smpSubAssign( Matrix<MT,false>& lhs, const DMatScalarMultExpr& rhs )
8223  {
8225 
8227 
8228  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
8229  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
8230 
8231  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
8232  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
8233 
8234  if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
8235  smpSubAssign( ~lhs, trans( left ) * trans( right ) * rhs.scalar_ );
8236  else if( IsSymmetric<MT1>::value )
8237  smpSubAssign( ~lhs, trans( left ) * right * rhs.scalar_ );
8238  else
8239  smpSubAssign( ~lhs, left * trans( right ) * rhs.scalar_ );
8240  }
8241  //**********************************************************************************************
8242 
8243  //**SMP subtraction assignment to sparse matrices***********************************************
8244  // No special implementation for the SMP subtraction assignment to sparse matrices.
8245  //**********************************************************************************************
8246 
8247  //**SMP multiplication assignment to dense matrices*********************************************
8248  // No special implementation for the SMP multiplication assignment to dense matrices.
8249  //**********************************************************************************************
8250 
8251  //**SMP multiplication assignment to sparse matrices********************************************
8252  // No special implementation for the SMP multiplication assignment to sparse matrices.
8253  //**********************************************************************************************
8254 
8255  //**Compile time checks*************************************************************************
8263  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
8264  //**********************************************************************************************
8265 };
8267 //*************************************************************************************************
8268 
8269 
8270 
8271 
8272 //=================================================================================================
8273 //
8274 // GLOBAL BINARY ARITHMETIC OPERATORS
8275 //
8276 //=================================================================================================
8277 
8278 //*************************************************************************************************
8304 template< typename T1 // Type of the left-hand side dense matrix
8305  , typename T2 > // Type of the right-hand side dense matrix
8306 inline const TDMatTDMatMultExpr<T1,T2>
8308 {
8310 
8311  if( (~lhs).columns() != (~rhs).rows() )
8312  throw std::invalid_argument( "Matrix sizes do not match" );
8313 
8314  return TDMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
8315 }
8316 //*************************************************************************************************
8317 
8318 
8319 
8320 
8321 //=================================================================================================
8322 //
8323 // ROWS SPECIALIZATIONS
8324 //
8325 //=================================================================================================
8326 
8327 //*************************************************************************************************
8329 template< typename MT1, typename MT2 >
8330 struct Rows< TDMatTDMatMultExpr<MT1,MT2> >
8331  : public Rows<MT1>
8332 {};
8334 //*************************************************************************************************
8335 
8336 
8337 
8338 
8339 //=================================================================================================
8340 //
8341 // COLUMNS SPECIALIZATIONS
8342 //
8343 //=================================================================================================
8344 
8345 //*************************************************************************************************
8347 template< typename MT1, typename MT2 >
8348 struct Columns< TDMatTDMatMultExpr<MT1,MT2> >
8349  : public Columns<MT2>
8350 {};
8352 //*************************************************************************************************
8353 
8354 
8355 
8356 
8357 //=================================================================================================
8358 //
8359 // ISLOWER SPECIALIZATIONS
8360 //
8361 //=================================================================================================
8362 
8363 //*************************************************************************************************
8365 template< typename MT1, typename MT2 >
8366 struct IsLower< TDMatTDMatMultExpr<MT1,MT2> >
8367  : public IsTrue< And< IsLower<MT1>, IsLower<MT2> >::value >
8368 {};
8370 //*************************************************************************************************
8371 
8372 
8373 
8374 
8375 //=================================================================================================
8376 //
8377 // ISUNILOWER SPECIALIZATIONS
8378 //
8379 //=================================================================================================
8380 
8381 //*************************************************************************************************
8383 template< typename MT1, typename MT2 >
8384 struct IsUniLower< TDMatTDMatMultExpr<MT1,MT2> >
8385  : public IsTrue< And< IsUniLower<MT1>, IsUniLower<MT2> >::value >
8386 {};
8388 //*************************************************************************************************
8389 
8390 
8391 
8392 
8393 //=================================================================================================
8394 //
8395 // ISSTRICTLYLOWER SPECIALIZATIONS
8396 //
8397 //=================================================================================================
8398 
8399 //*************************************************************************************************
8401 template< typename MT1, typename MT2 >
8402 struct IsStrictlyLower< TDMatTDMatMultExpr<MT1,MT2> >
8403  : public IsTrue< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
8404  , And< IsStrictlyLower<MT2>, IsLower<MT1> > >::value >
8405 {};
8407 //*************************************************************************************************
8408 
8409 
8410 
8411 
8412 //=================================================================================================
8413 //
8414 // ISUPPER SPECIALIZATIONS
8415 //
8416 //=================================================================================================
8417 
8418 //*************************************************************************************************
8420 template< typename MT1, typename MT2 >
8421 struct IsUpper< TDMatTDMatMultExpr<MT1,MT2> >
8422  : public IsTrue< And< IsUpper<MT1>, IsUpper<MT2> >::value >
8423 {};
8425 //*************************************************************************************************
8426 
8427 
8428 
8429 
8430 //=================================================================================================
8431 //
8432 // ISUNIUPPER SPECIALIZATIONS
8433 //
8434 //=================================================================================================
8435 
8436 //*************************************************************************************************
8438 template< typename MT1, typename MT2 >
8439 struct IsUniUpper< TDMatTDMatMultExpr<MT1,MT2> >
8440  : public IsTrue< And< IsUniUpper<MT1>, IsUniUpper<MT2> >::value >
8441 {};
8443 //*************************************************************************************************
8444 
8445 
8446 
8447 
8448 //=================================================================================================
8449 //
8450 // ISSTRICTLYUPPER SPECIALIZATIONS
8451 //
8452 //=================================================================================================
8453 
8454 //*************************************************************************************************
8456 template< typename MT1, typename MT2 >
8457 struct IsStrictlyUpper< TDMatTDMatMultExpr<MT1,MT2> >
8458  : public IsTrue< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
8459  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > >::value >
8460 {};
8462 //*************************************************************************************************
8463 
8464 
8465 
8466 
8467 //=================================================================================================
8468 //
8469 // EXPRESSION TRAIT SPECIALIZATIONS
8470 //
8471 //=================================================================================================
8472 
8473 //*************************************************************************************************
8475 template< typename MT1, typename MT2, typename VT >
8476 struct TDMatDVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
8477 {
8478  public:
8479  //**********************************************************************************************
8480  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
8481  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
8482  IsDenseVector<VT>::value && IsColumnVector<VT>::value
8483  , typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
8484  , INVALID_TYPE >::Type Type;
8485  //**********************************************************************************************
8486 };
8488 //*************************************************************************************************
8489 
8490 
8491 //*************************************************************************************************
8493 template< typename MT1, typename MT2, typename VT >
8494 struct TDMatSVecMultExprTrait< TDMatTDMatMultExpr<MT1,MT2>, VT >
8495 {
8496  public:
8497  //**********************************************************************************************
8498  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
8499  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
8500  IsSparseVector<VT>::value && IsColumnVector<VT>::value
8501  , typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
8502  , INVALID_TYPE >::Type Type;
8503  //**********************************************************************************************
8504 };
8506 //*************************************************************************************************
8507 
8508 
8509 //*************************************************************************************************
8511 template< typename VT, typename MT1, typename MT2 >
8512 struct TDVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
8513 {
8514  public:
8515  //**********************************************************************************************
8516  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
8517  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
8518  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
8519  , typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
8520  , INVALID_TYPE >::Type Type;
8521  //**********************************************************************************************
8522 };
8524 //*************************************************************************************************
8525 
8526 
8527 //*************************************************************************************************
8529 template< typename VT, typename MT1, typename MT2 >
8530 struct TSVecTDMatMultExprTrait< VT, TDMatTDMatMultExpr<MT1,MT2> >
8531 {
8532  public:
8533  //**********************************************************************************************
8534  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
8535  IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
8536  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
8537  , typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
8538  , INVALID_TYPE >::Type Type;
8539  //**********************************************************************************************
8540 };
8542 //*************************************************************************************************
8543 
8544 
8545 //*************************************************************************************************
8547 template< typename MT1, typename MT2, bool AF >
8548 struct SubmatrixExprTrait< TDMatTDMatMultExpr<MT1,MT2>, AF >
8549 {
8550  public:
8551  //**********************************************************************************************
8552  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
8553  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
8554  //**********************************************************************************************
8555 };
8557 //*************************************************************************************************
8558 
8559 
8560 //*************************************************************************************************
8562 template< typename MT1, typename MT2 >
8563 struct RowExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
8564 {
8565  public:
8566  //**********************************************************************************************
8567  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
8568  //**********************************************************************************************
8569 };
8571 //*************************************************************************************************
8572 
8573 
8574 //*************************************************************************************************
8576 template< typename MT1, typename MT2 >
8577 struct ColumnExprTrait< TDMatTDMatMultExpr<MT1,MT2> >
8578 {
8579  public:
8580  //**********************************************************************************************
8581  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
8582  //**********************************************************************************************
8583 };
8585 //*************************************************************************************************
8586 
8587 } // namespace blaze
8588 
8589 #endif
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:361
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1649
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
Header file for mathematical functions.
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:312
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:318
Header file for the IsUniUpper type trait.
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:105
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:315
Header file for basic type definitions.
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:306
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:140
const size_t TDMATTDMATMULT_THRESHOLD
Column-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies t...
Definition: Thresholds.h:176
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTDMatMultExpr.h:310
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
Header file for the And class template.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:497
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:259
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:131
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:307
Header file for the IsUniLower type trait.
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2503
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:143
TDMatTDMatMultExpr< MT1, MT2 > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:305
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:424
const size_t SMP_TDMATTDMATMULT_THRESHOLD
SMP column-major dense matrix/column-major dense matrix multiplication threshold.This threshold speci...
Definition: Thresholds.h:903
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:444
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsSymmetric type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
Header file for the TSVecTDMatMultExprTrait class template.
Header file for the Or class template.
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > > >::Type store(T *address, const sse_int16_t &value)
Aligned store of a vector of 2-byte integral values.
Definition: Store.h:80
Header file for the TDMatSVecMultExprTrait class template.
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
Header file for the Not class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:311
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTDMatMultExpr.h:478
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
Header file for BLAS level 3 functions.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
Header file for the IsStrictlyTriangular type trait.
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:468
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type load(const T *address)
Loads a vector of 2-byte integral values.
Definition: Load.h:79
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:138
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:346
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
System settings for the BLAS mode.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:139
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTDMatMultExpr.h:488
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:150
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Constraint on the data type.
Header file for the HasMutableDataAccess type trait.
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:141
BLAZE_ALWAYS_INLINE EnableIf< And< IsIntegral< T >, HasSize< T, 2UL > >, sse_int16_t >::Type set(T value)
Sets all values in the vector to the given 2-byte integral value.
Definition: Set.h:73
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:414
Header file for all intrinsic functionality.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:456
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:309
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:498
Header file for the IsRowMajorMatrix type trait.
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:937
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
Header file for the IsTrue value trait.
Header file for the IsComplex type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:434
Header file for the complex data type.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:324
Header file for the IsUpper type trait.
Header file for the IsColumnVector type trait.
Constraint on the data type.
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:308
Header file for the IsResizable type trait.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:321
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:142