DMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <blaze/math/blas/Level3.h>
53 #include <blaze/math/Functions.h>
54 #include <blaze/math/Intrinsics.h>
55 #include <blaze/math/shims/Reset.h>
91 #include <blaze/system/BLAS.h>
93 #include <blaze/util/Assert.h>
94 #include <blaze/util/Complex.h>
98 #include <blaze/util/DisableIf.h>
99 #include <blaze/util/EnableIf.h>
100 #include <blaze/util/InvalidType.h>
102 #include <blaze/util/mpl/And.h>
103 #include <blaze/util/mpl/Not.h>
104 #include <blaze/util/mpl/Or.h>
105 #include <blaze/util/SelectType.h>
106 #include <blaze/util/Types.h>
113 
114 
115 namespace blaze {
116 
117 //=================================================================================================
118 //
119 // CLASS DMATTDMATMULTEXPR
120 //
121 //=================================================================================================
122 
123 //*************************************************************************************************
130 template< typename MT1 // Type of the left-hand side dense matrix
131  , typename MT2 > // Type of the right-hand side dense matrix
132 class DMatTDMatMultExpr : public DenseMatrix< DMatTDMatMultExpr<MT1,MT2>, false >
133  , private MatMatMultExpr
134  , private Computation
135 {
136  private:
137  //**Type definitions****************************************************************************
138  typedef typename MT1::ResultType RT1;
139  typedef typename MT2::ResultType RT2;
140  typedef typename RT1::ElementType ET1;
141  typedef typename RT2::ElementType ET2;
142  typedef typename MT1::CompositeType CT1;
143  typedef typename MT2::CompositeType CT2;
144  //**********************************************************************************************
145 
146  //**********************************************************************************************
149  //**********************************************************************************************
150 
151  //**********************************************************************************************
153  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
154  //**********************************************************************************************
155 
156  //**********************************************************************************************
158 
162  template< typename T1, typename T2, typename T3 >
163  struct IsEvaluationRequired {
164  enum { value = ( evaluateLeft || evaluateRight ) };
165  };
167  //**********************************************************************************************
168 
169  //**********************************************************************************************
171 
174  template< typename T1, typename T2, typename T3 >
175  struct UseSinglePrecisionKernel {
176  enum { value = BLAZE_BLAS_MODE &&
177  HasMutableDataAccess<T1>::value &&
178  HasConstDataAccess<T2>::value &&
179  HasConstDataAccess<T3>::value &&
180  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
181  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
182  IsFloat<typename T1::ElementType>::value &&
183  IsFloat<typename T2::ElementType>::value &&
184  IsFloat<typename T3::ElementType>::value };
185  };
187  //**********************************************************************************************
188 
189  //**********************************************************************************************
191 
194  template< typename T1, typename T2, typename T3 >
195  struct UseDoublePrecisionKernel {
196  enum { value = BLAZE_BLAS_MODE &&
197  HasMutableDataAccess<T1>::value &&
198  HasConstDataAccess<T2>::value &&
199  HasConstDataAccess<T3>::value &&
200  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
201  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
202  IsDouble<typename T1::ElementType>::value &&
203  IsDouble<typename T2::ElementType>::value &&
204  IsDouble<typename T3::ElementType>::value };
205  };
207  //**********************************************************************************************
208 
209  //**********************************************************************************************
211 
215  template< typename T1, typename T2, typename T3 >
216  struct UseSinglePrecisionComplexKernel {
217  typedef complex<float> Type;
218  enum { value = BLAZE_BLAS_MODE &&
219  HasMutableDataAccess<T1>::value &&
220  HasConstDataAccess<T2>::value &&
221  HasConstDataAccess<T3>::value &&
222  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
223  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
224  IsSame<typename T1::ElementType,Type>::value &&
225  IsSame<typename T2::ElementType,Type>::value &&
226  IsSame<typename T3::ElementType,Type>::value };
227  };
229  //**********************************************************************************************
230 
231  //**********************************************************************************************
233 
237  template< typename T1, typename T2, typename T3 >
238  struct UseDoublePrecisionComplexKernel {
239  typedef complex<double> Type;
240  enum { value = BLAZE_BLAS_MODE &&
241  HasMutableDataAccess<T1>::value &&
242  HasConstDataAccess<T2>::value &&
243  HasConstDataAccess<T3>::value &&
244  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
245  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
246  IsSame<typename T1::ElementType,Type>::value &&
247  IsSame<typename T2::ElementType,Type>::value &&
248  IsSame<typename T3::ElementType,Type>::value };
249  };
251  //**********************************************************************************************
252 
253  //**********************************************************************************************
255 
258  template< typename T1, typename T2, typename T3 >
259  struct UseDefaultKernel {
260  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
261  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
262  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
263  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
264  };
266  //**********************************************************************************************
267 
268  //**********************************************************************************************
270 
273  template< typename T1, typename T2, typename T3 >
274  struct UseVectorizedDefaultKernel {
275  enum { value = !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
276  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
277  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
278  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
279  IntrinsicTrait<typename T1::ElementType>::addition &&
280  IntrinsicTrait<typename T1::ElementType>::multiplication };
281  };
283  //**********************************************************************************************
284 
285  public:
286  //**Type definitions****************************************************************************
293  typedef const ElementType ReturnType;
294  typedef const ResultType CompositeType;
295 
297  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
298 
300  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
301 
304 
307  //**********************************************************************************************
308 
309  //**Compilation flags***************************************************************************
311  enum { vectorizable = !IsDiagonal<MT1>::value && !IsDiagonal<MT2>::value &&
312  MT1::vectorizable && MT2::vectorizable &&
316 
318  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
319  !evaluateRight && MT2::smpAssignable };
320  //**********************************************************************************************
321 
322  //**Constructor*********************************************************************************
328  explicit inline DMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
329  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
330  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
331  {
332  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
333  }
334  //**********************************************************************************************
335 
336  //**Access operator*****************************************************************************
343  inline ReturnType operator()( size_t i, size_t j ) const {
344  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
345  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
346 
347  const size_t kbegin( ( IsUpper<MT1>::value )
348  ?( ( IsLower<MT2>::value )
349  ?( max( ( IsStrictlyUpper<MT1>::value ? i+1UL : i )
350  , ( IsStrictlyLower<MT2>::value ? j+1UL : j ) ) )
351  :( IsStrictlyUpper<MT1>::value ? i+1UL : i ) )
352  :( ( IsLower<MT2>::value )
353  ?( IsStrictlyLower<MT2>::value ? j+1UL : j )
354  :( 0UL ) ) );
355  const size_t kend( ( IsLower<MT1>::value )
356  ?( ( IsUpper<MT2>::value )
357  ?( min( ( IsStrictlyLower<MT1>::value ? i : i+1UL )
358  , ( IsStrictlyUpper<MT2>::value ? j : j+1UL ) ) )
359  :( IsStrictlyLower<MT1>::value ? i : i+1UL ) )
360  :( ( IsUpper<MT2>::value )
361  ?( IsStrictlyUpper<MT2>::value ? j : j+1UL )
362  :( lhs_.columns() ) ) );
363 
364  if( lhs_.columns() == 0UL ||
365  ( ( IsTriangular<MT1>::value || IsTriangular<MT2>::value ) && kbegin >= kend ) )
366  return ElementType();
367 
369  return lhs_(i,i) * rhs_(i,j);
370 
372  return lhs_(i,j) * rhs_(j,j);
373 
374  const size_t knum( kend - kbegin );
375  const size_t kpos( kbegin + ( ( knum - 1UL ) & size_t(-2) ) + 1UL );
376 
377  ElementType tmp( lhs_(i,kbegin) * rhs_(kbegin,j) );
378 
379  for( size_t k=kbegin+1UL; k<kpos; k+=2UL ) {
380  tmp += lhs_(i,k ) * rhs_(k ,j);
381  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
382  }
383  if( kpos < kend ) {
384  tmp += lhs_(i,kpos) * rhs_(kpos,j);
385  }
386 
387  return tmp;
388  }
389  //**********************************************************************************************
390 
391  //**Rows function*******************************************************************************
396  inline size_t rows() const {
397  return lhs_.rows();
398  }
399  //**********************************************************************************************
400 
401  //**Columns function****************************************************************************
406  inline size_t columns() const {
407  return rhs_.columns();
408  }
409  //**********************************************************************************************
410 
411  //**Left operand access*************************************************************************
416  inline LeftOperand leftOperand() const {
417  return lhs_;
418  }
419  //**********************************************************************************************
420 
421  //**Right operand access************************************************************************
426  inline RightOperand rightOperand() const {
427  return rhs_;
428  }
429  //**********************************************************************************************
430 
431  //**********************************************************************************************
437  template< typename T >
438  inline bool canAlias( const T* alias ) const {
439  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
440  }
441  //**********************************************************************************************
442 
443  //**********************************************************************************************
449  template< typename T >
450  inline bool isAliased( const T* alias ) const {
451  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
452  }
453  //**********************************************************************************************
454 
455  //**********************************************************************************************
460  inline bool isAligned() const {
461  return lhs_.isAligned() && rhs_.isAligned();
462  }
463  //**********************************************************************************************
464 
465  //**********************************************************************************************
470  inline bool canSMPAssign() const {
471  return ( !BLAZE_BLAS_IS_PARALLEL ||
472  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
474  }
475  //**********************************************************************************************
476 
477  private:
478  //**Member variables****************************************************************************
479  LeftOperand lhs_;
480  RightOperand rhs_;
481  //**********************************************************************************************
482 
483  //**Assignment to dense matrices****************************************************************
496  template< typename MT // Type of the target dense matrix
497  , bool SO > // Storage order of the target dense matrix
498  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
499  {
501 
502  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
503  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
504 
505  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
506  return;
507  }
508  else if( rhs.lhs_.columns() == 0UL ) {
509  reset( ~lhs );
510  return;
511  }
512 
513  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
514  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
515 
516  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
517  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
518  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
519  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
520  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
521  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
522 
523  DMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
524  }
526  //**********************************************************************************************
527 
528  //**Assignment to dense matrices (kernel selection)*********************************************
539  template< typename MT3 // Type of the left-hand side target matrix
540  , typename MT4 // Type of the left-hand side matrix operand
541  , typename MT5 > // Type of the right-hand side matrix operand
542  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
543  {
545  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
546  selectSmallAssignKernel( C, A, B );
547  else
548  selectBlasAssignKernel( C, A, B );
549  }
551  //**********************************************************************************************
552 
553  //**Default assignment to row-major dense matrices (general/general)****************************
567  template< typename MT3 // Type of the left-hand side target matrix
568  , typename MT4 // Type of the left-hand side matrix operand
569  , typename MT5 > // Type of the right-hand side matrix operand
570  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
571  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
572  {
573  const size_t M( A.rows() );
574  const size_t N( B.columns() );
575  const size_t K( A.columns() );
576 
577  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
578  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
579  :( 0UL ) );
580  const size_t iend( ( IsStrictlyUpper<MT4>::value )
581  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
582  :( M ) );
583  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
584 
585  for( size_t i=0UL; i<ibegin; ++i ) {
586  for( size_t j=0UL; j<N; ++j ) {
587  reset( (~C)(i,j) );
588  }
589  }
590  for( size_t i=ibegin; i<iend; ++i )
591  {
592  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
593  ?( ( IsStrictlyUpper<MT4>::value )
594  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
595  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
596  :( IsStrictlyUpper<MT5>::value ? 1UL : 0UL ) );
597  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
598  ?( ( IsStrictlyLower<MT4>::value )
599  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
600  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
601  :( IsStrictlyLower<MT5>::value ? N-1UL : N ) );
602  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
603 
604  for( size_t j=0UL; j<jbegin; ++j ) {
605  reset( (~C)(i,j) );
606  }
607  for( size_t j=jbegin; j<jend; ++j )
608  {
609  const size_t kbegin( ( IsUpper<MT4>::value )
610  ?( ( IsLower<MT5>::value )
611  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
612  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
613  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
614  :( ( IsLower<MT5>::value )
615  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
616  :( 0UL ) ) );
617  const size_t kend( ( IsLower<MT4>::value )
618  ?( ( IsUpper<MT5>::value )
619  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
620  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
621  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
622  :( ( IsUpper<MT5>::value )
623  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
624  :( K ) ) );
625  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
626 
627  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
628  for( size_t k=kbegin+1UL; k<kend; ++k ) {
629  (~C)(i,j) += A(i,k) * B(k,j);
630  }
631  }
632  for( size_t j=jend; j<N; ++j ) {
633  reset( (~C)(i,j) );
634  }
635  }
636  for( size_t i=iend; i<M; ++i ) {
637  for( size_t j=0UL; j<N; ++j ) {
638  reset( (~C)(i,j) );
639  }
640  }
641  }
643  //**********************************************************************************************
644 
645  //**Default assignment to column-major dense matrices (general/general)*************************
659  template< typename MT3 // Type of the left-hand side target matrix
660  , typename MT4 // Type of the left-hand side matrix operand
661  , typename MT5 > // Type of the right-hand side matrix operand
662  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
663  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
664  {
665  const size_t M( A.rows() );
666  const size_t N( B.columns() );
667  const size_t K( A.columns() );
668 
669  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
670  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
671  :( 0UL ) );
672  const size_t jend( ( IsStrictlyLower<MT5>::value )
673  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
674  :( N ) );
675  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
676 
677  for( size_t j=0UL; j<jbegin; ++j ) {
678  for( size_t i=0UL; i<M; ++i ) {
679  reset( (~C)(i,j) );
680  }
681  }
682  for( size_t j=jbegin; j<jend; ++j )
683  {
684  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
685  ?( ( IsStrictlyLower<MT4>::value )
686  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
687  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
688  :( IsStrictlyLower<MT4>::value ? 1UL : 0UL ) );
689  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
690  ?( ( IsStrictlyUpper<MT4>::value )
691  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
692  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
693  :( IsStrictlyUpper<MT4>::value ? M-1UL : M ) );
694  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
695 
696  for( size_t i=0UL; i<ibegin; ++i ) {
697  reset( (~C)(i,j) );
698  }
699  for( size_t i=ibegin; i<iend; ++i )
700  {
701  const size_t kbegin( ( IsUpper<MT4>::value )
702  ?( ( IsLower<MT5>::value )
703  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
704  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
705  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
706  :( ( IsLower<MT5>::value )
707  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
708  :( 0UL ) ) );
709  const size_t kend( ( IsLower<MT4>::value )
710  ?( ( IsUpper<MT5>::value )
711  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
712  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
713  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
714  :( ( IsUpper<MT5>::value )
715  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
716  :( K ) ) );
717  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
718 
719  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
720  for( size_t k=kbegin+1UL; k<kend; ++k ) {
721  (~C)(i,j) += A(i,k) * B(k,j);
722  }
723  }
724  for( size_t i=iend; i<M; ++i ) {
725  reset( (~C)(i,j) );
726  }
727  }
728  for( size_t j=jend; j<N; ++j ) {
729  for( size_t i=0UL; i<M; ++i ) {
730  reset( (~C)(i,j) );
731  }
732  }
733  }
735  //**********************************************************************************************
736 
737  //**Default assignment to row-major dense matrices (general/diagonal)***************************
751  template< typename MT3 // Type of the left-hand side target matrix
752  , typename MT4 // Type of the left-hand side matrix operand
753  , typename MT5 > // Type of the right-hand side matrix operand
754  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
755  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
756  {
757  const size_t M( A.rows() );
758  const size_t N( B.columns() );
759 
760  for( size_t i=0UL; i<M; ++i )
761  {
762  const size_t jbegin( ( IsUpper<MT4>::value )
763  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
764  :( 0UL ) );
765  const size_t jend( ( IsLower<MT4>::value )
766  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
767  :( N ) );
768  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
769 
770  if( IsUpper<MT4>::value ) {
771  for( size_t j=0UL; j<jbegin; ++j ) {
772  reset( (~C)(i,j) );
773  }
774  }
775  for( size_t j=jbegin; j<jend; ++j ) {
776  (~C)(i,j) = A(i,j) * B(j,j);
777  }
778  if( IsLower<MT4>::value ) {
779  for( size_t j=jend; j<N; ++j ) {
780  reset( (~C)(i,j) );
781  }
782  }
783  }
784  }
786  //**********************************************************************************************
787 
788  //**Default assignment to column-major dense matrices (general/diagonal)************************
802  template< typename MT3 // Type of the left-hand side target matrix
803  , typename MT4 // Type of the left-hand side matrix operand
804  , typename MT5 > // Type of the right-hand side matrix operand
805  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
806  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
807  {
808  const size_t M( A.rows() );
809  const size_t N( B.columns() );
810 
811  const size_t block( 16UL );
812 
813  for( size_t jj=0UL; jj<N; jj+=block ) {
814  const size_t jend( min( N, jj+block ) );
815  for( size_t ii=0UL; ii<M; ii+=block ) {
816  const size_t iend( min( M, ii+block ) );
817  for( size_t j=jj; j<jend; ++j )
818  {
819  const size_t ibegin( ( IsLower<MT4>::value )
820  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
821  :( ii ) );
822  const size_t ipos( ( IsUpper<MT4>::value )
823  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
824  :( iend ) );
825 
826  if( IsLower<MT4>::value ) {
827  for( size_t i=ii; i<ibegin; ++i ) {
828  reset( (~C)(i,j) );
829  }
830  }
831  for( size_t i=ibegin; i<ipos; ++i ) {
832  (~C)(i,j) = A(i,j) * B(j,j);
833  }
834  if( IsUpper<MT4>::value ) {
835  for( size_t i=ipos; i<iend; ++i ) {
836  reset( (~C)(i,j) );
837  }
838  }
839  }
840  }
841  }
842  }
844  //**********************************************************************************************
845 
846  //**Default assignment to row-major dense matrices (diagonal/general)***************************
860  template< typename MT3 // Type of the left-hand side target matrix
861  , typename MT4 // Type of the left-hand side matrix operand
862  , typename MT5 > // Type of the right-hand side matrix operand
863  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
864  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
865  {
866  const size_t M( A.rows() );
867  const size_t N( B.columns() );
868 
869  const size_t block( 16UL );
870 
871  for( size_t ii=0UL; ii<M; ii+=block ) {
872  const size_t iend( min( M, ii+block ) );
873  for( size_t jj=0UL; jj<N; jj+=block ) {
874  const size_t jend( min( N, jj+block ) );
875  for( size_t i=ii; i<iend; ++i )
876  {
877  const size_t jbegin( ( IsUpper<MT5>::value )
878  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
879  :( jj ) );
880  const size_t jpos( ( IsLower<MT5>::value )
881  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
882  :( jend ) );
883 
884  if( IsUpper<MT5>::value ) {
885  for( size_t j=jj; j<jbegin; ++j ) {
886  reset( (~C)(i,j) );
887  }
888  }
889  for( size_t j=jbegin; j<jpos; ++j ) {
890  (~C)(i,j) = A(i,i) * B(i,j);
891  }
892  if( IsLower<MT5>::value ) {
893  for( size_t j=jpos; j<jend; ++j ) {
894  reset( (~C)(i,j) );
895  }
896  }
897  }
898  }
899  }
900  }
902  //**********************************************************************************************
903 
904  //**Default assignment to column-major dense matrices (diagonal/general)************************
918  template< typename MT3 // Type of the left-hand side target matrix
919  , typename MT4 // Type of the left-hand side matrix operand
920  , typename MT5 > // Type of the right-hand side matrix operand
921  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
922  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
923  {
924  const size_t M( A.rows() );
925  const size_t N( B.columns() );
926 
927  for( size_t j=0UL; j<N; ++j )
928  {
929  const size_t ibegin( ( IsLower<MT5>::value )
930  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
931  :( 0UL ) );
932  const size_t iend( ( IsUpper<MT5>::value )
933  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
934  :( M ) );
935  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
936 
937  if( IsLower<MT5>::value ) {
938  for( size_t i=0UL; i<ibegin; ++i ) {
939  reset( (~C)(i,j) );
940  }
941  }
942  for( size_t i=ibegin; i<iend; ++i ) {
943  (~C)(i,j) = A(i,i) * B(i,j);
944  }
945  if( IsUpper<MT5>::value ) {
946  for( size_t i=iend; i<M; ++i ) {
947  reset( (~C)(i,j) );
948  }
949  }
950  }
951  }
953  //**********************************************************************************************
954 
955  //**Default assignment to dense matrices (diagonal/diagonal)************************************
969  template< typename MT3 // Type of the left-hand side target matrix
970  , typename MT4 // Type of the left-hand side matrix operand
971  , typename MT5 > // Type of the right-hand side matrix operand
972  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
973  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
974  {
975  reset( C );
976 
977  for( size_t i=0UL; i<A.rows(); ++i ) {
978  C(i,i) = A(i,i) * B(i,i);
979  }
980  }
982  //**********************************************************************************************
983 
984  //**Default assignment to dense matrices (small matrices)***************************************
998  template< typename MT3 // Type of the left-hand side target matrix
999  , typename MT4 // Type of the left-hand side matrix operand
1000  , typename MT5 > // Type of the right-hand side matrix operand
1001  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1002  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B )
1003  {
1004  selectDefaultAssignKernel( C, A, B );
1005  }
1007  //**********************************************************************************************
1008 
1009  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
1024  template< typename MT3 // Type of the left-hand side target matrix
1025  , typename MT4 // Type of the left-hand side matrix operand
1026  , typename MT5 > // Type of the right-hand side matrix operand
1027  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1028  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1029  {
1030  typedef IntrinsicTrait<ElementType> IT;
1031 
1032  const size_t M( A.rows() );
1033  const size_t N( B.columns() );
1034  const size_t K( A.columns() );
1035 
1036  size_t i( 0UL );
1037 
1038  for( ; (i+2UL) <= M; i+=2UL )
1039  {
1040  size_t j( 0UL );
1041 
1042  for( ; (j+4UL) <= N; j+=4UL )
1043  {
1044  const size_t kbegin( ( IsUpper<MT4>::value )
1045  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1046  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1047  const size_t kend( ( IsLower<MT4>::value )
1048  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
1049  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
1050 
1051  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1052 
1053  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1054  const IntrinsicType a1( A.load(i ,k) );
1055  const IntrinsicType a2( A.load(i+1UL,k) );
1056  const IntrinsicType b1( B.load(k,j ) );
1057  const IntrinsicType b2( B.load(k,j+1UL) );
1058  const IntrinsicType b3( B.load(k,j+2UL) );
1059  const IntrinsicType b4( B.load(k,j+3UL) );
1060  xmm1 = xmm1 + a1 * b1;
1061  xmm2 = xmm2 + a1 * b2;
1062  xmm3 = xmm3 + a1 * b3;
1063  xmm4 = xmm4 + a1 * b4;
1064  xmm5 = xmm5 + a2 * b1;
1065  xmm6 = xmm6 + a2 * b2;
1066  xmm7 = xmm7 + a2 * b3;
1067  xmm8 = xmm8 + a2 * b4;
1068  }
1069 
1070  (~C)(i ,j ) = sum( xmm1 );
1071  (~C)(i ,j+1UL) = sum( xmm2 );
1072  (~C)(i ,j+2UL) = sum( xmm3 );
1073  (~C)(i ,j+3UL) = sum( xmm4 );
1074  (~C)(i+1UL,j ) = sum( xmm5 );
1075  (~C)(i+1UL,j+1UL) = sum( xmm6 );
1076  (~C)(i+1UL,j+2UL) = sum( xmm7 );
1077  (~C)(i+1UL,j+3UL) = sum( xmm8 );
1078  }
1079 
1080  for( ; (j+2UL) <= N; j+=2UL )
1081  {
1082  const size_t kbegin( ( IsUpper<MT4>::value )
1083  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1084  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1085  const size_t kend( ( IsLower<MT4>::value )
1086  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
1087  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1088 
1089  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1090 
1091  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1092  const IntrinsicType a1( A.load(i ,k) );
1093  const IntrinsicType a2( A.load(i+1UL,k) );
1094  const IntrinsicType b1( B.load(k,j ) );
1095  const IntrinsicType b2( B.load(k,j+1UL) );
1096  xmm1 = xmm1 + a1 * b1;
1097  xmm2 = xmm2 + a1 * b2;
1098  xmm3 = xmm3 + a2 * b1;
1099  xmm4 = xmm4 + a2 * b2;
1100  }
1101 
1102  (~C)(i ,j ) = sum( xmm1 );
1103  (~C)(i ,j+1UL) = sum( xmm2 );
1104  (~C)(i+1UL,j ) = sum( xmm3 );
1105  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1106  }
1107 
1108  if( j < N )
1109  {
1110  const size_t kbegin( ( IsUpper<MT4>::value )
1111  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1112  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1113  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
1114 
1115  IntrinsicType xmm1, xmm2;
1116 
1117  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1118  const IntrinsicType b1( B.load(k,j) );
1119  xmm1 = xmm1 + A.load(i ,k) * b1;
1120  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1121  }
1122 
1123  (~C)(i ,j) = sum( xmm1 );
1124  (~C)(i+1UL,j) = sum( xmm2 );
1125  }
1126  }
1127 
1128  if( i < M )
1129  {
1130  size_t j( 0UL );
1131 
1132  for( ; (j+4UL) <= N; j+=4UL )
1133  {
1134  const size_t kbegin( ( IsUpper<MT4>::value )
1135  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1136  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1137  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
1138 
1139  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1140 
1141  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1142  const IntrinsicType a1( A.load(i,k) );
1143  xmm1 = xmm1 + a1 * B.load(k,j );
1144  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1145  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1146  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1147  }
1148 
1149  (~C)(i,j ) = sum( xmm1 );
1150  (~C)(i,j+1UL) = sum( xmm2 );
1151  (~C)(i,j+2UL) = sum( xmm3 );
1152  (~C)(i,j+3UL) = sum( xmm4 );
1153  }
1154 
1155  for( ; (j+2UL) <= N; j+=2UL )
1156  {
1157  const size_t kbegin( ( IsUpper<MT4>::value )
1158  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1159  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1160  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
1161 
1162  IntrinsicType xmm1, xmm2;
1163 
1164  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1165  const IntrinsicType a1( A.load(i,k) );
1166  xmm1 = xmm1 + a1 * B.load(k,j );
1167  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1168  }
1169 
1170  (~C)(i,j ) = sum( xmm1 );
1171  (~C)(i,j+1UL) = sum( xmm2 );
1172  }
1173 
1174  if( j < N )
1175  {
1176  const size_t kbegin( ( IsUpper<MT4>::value )
1177  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1178  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1179 
1180  IntrinsicType xmm1;
1181 
1182  for( size_t k=kbegin; k<K; k+=IT::size ) {
1183  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1184  }
1185 
1186  (~C)(i,j) = sum( xmm1 );
1187  }
1188  }
1189  }
1191  //**********************************************************************************************
1192 
1193  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
1208  template< typename MT3 // Type of the left-hand side target matrix
1209  , typename MT4 // Type of the left-hand side matrix operand
1210  , typename MT5 > // Type of the right-hand side matrix operand
1211  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1212  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1213  {
1214  typedef IntrinsicTrait<ElementType> IT;
1215 
1216  const size_t M( A.rows() );
1217  const size_t N( B.columns() );
1218  const size_t K( A.columns() );
1219 
1220  size_t i( 0UL );
1221 
1222  for( ; (i+4UL) <= M; i+=4UL )
1223  {
1224  size_t j( 0UL );
1225 
1226  for( ; (j+2UL) <= N; j+=2UL )
1227  {
1228  const size_t kbegin( ( IsUpper<MT4>::value )
1229  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1230  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1231  const size_t kend( ( IsLower<MT4>::value )
1232  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
1233  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1234 
1235  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1236 
1237  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1238  const IntrinsicType a1( A.load(i ,k) );
1239  const IntrinsicType a2( A.load(i+1UL,k) );
1240  const IntrinsicType a3( A.load(i+2UL,k) );
1241  const IntrinsicType a4( A.load(i+3UL,k) );
1242  const IntrinsicType b1( B.load(k,j ) );
1243  const IntrinsicType b2( B.load(k,j+1UL) );
1244  xmm1 = xmm1 + a1 * b1;
1245  xmm2 = xmm2 + a1 * b2;
1246  xmm3 = xmm3 + a2 * b1;
1247  xmm4 = xmm4 + a2 * b2;
1248  xmm5 = xmm5 + a3 * b1;
1249  xmm6 = xmm6 + a3 * b2;
1250  xmm7 = xmm7 + a4 * b1;
1251  xmm8 = xmm8 + a4 * b2;
1252  }
1253 
1254  (~C)(i ,j ) = sum( xmm1 );
1255  (~C)(i ,j+1UL) = sum( xmm2 );
1256  (~C)(i+1UL,j ) = sum( xmm3 );
1257  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1258  (~C)(i+2UL,j ) = sum( xmm5 );
1259  (~C)(i+2UL,j+1UL) = sum( xmm6 );
1260  (~C)(i+3UL,j ) = sum( xmm7 );
1261  (~C)(i+3UL,j+1UL) = sum( xmm8 );
1262  }
1263 
1264  if( j < N )
1265  {
1266  const size_t kbegin( ( IsUpper<MT4>::value )
1267  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1268  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1269  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
1270 
1271  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1272 
1273  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1274  const IntrinsicType b1( B.load(k,j) );
1275  xmm1 = xmm1 + A.load(i ,k) * b1;
1276  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1277  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1278  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1279  }
1280 
1281  (~C)(i ,j) = sum( xmm1 );
1282  (~C)(i+1UL,j) = sum( xmm2 );
1283  (~C)(i+2UL,j) = sum( xmm3 );
1284  (~C)(i+3UL,j) = sum( xmm4 );
1285  }
1286  }
1287 
1288  for( ; (i+2UL) <= M; i+=2UL )
1289  {
1290  size_t j( 0UL );
1291 
1292  for( ; (j+2UL) <= N; j+=2UL )
1293  {
1294  const size_t kbegin( ( IsUpper<MT4>::value )
1295  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1296  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1297  const size_t kend( ( IsLower<MT4>::value )
1298  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
1299  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
1300 
1301  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1302 
1303  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1304  const IntrinsicType a1( A.load(i ,k) );
1305  const IntrinsicType a2( A.load(i+1UL,k) );
1306  const IntrinsicType b1( B.load(k,j ) );
1307  const IntrinsicType b2( B.load(k,j+1UL) );
1308  xmm1 = xmm1 + a1 * b1;
1309  xmm2 = xmm2 + a1 * b2;
1310  xmm3 = xmm3 + a2 * b1;
1311  xmm4 = xmm4 + a2 * b2;
1312  }
1313 
1314  (~C)(i ,j ) = sum( xmm1 );
1315  (~C)(i ,j+1UL) = sum( xmm2 );
1316  (~C)(i+1UL,j ) = sum( xmm3 );
1317  (~C)(i+1UL,j+1UL) = sum( xmm4 );
1318  }
1319 
1320  if( j < N )
1321  {
1322  const size_t kbegin( ( IsUpper<MT4>::value )
1323  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1324  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1325  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
1326 
1327  IntrinsicType xmm1, xmm2;
1328 
1329  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1330  const IntrinsicType b1( B.load(k,j) );
1331  xmm1 = xmm1 + A.load(i ,k) * b1;
1332  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1333  }
1334 
1335  (~C)(i ,j) = sum( xmm1 );
1336  (~C)(i+1UL,j) = sum( xmm2 );
1337  }
1338  }
1339 
1340  if( i < M )
1341  {
1342  size_t j( 0UL );
1343 
1344  for( ; (j+2UL) <= N; j+=2UL )
1345  {
1346  const size_t kbegin( ( IsUpper<MT4>::value )
1347  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1348  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1349  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
1350 
1351  IntrinsicType xmm1, xmm2;
1352 
1353  for( size_t k=kbegin; k<kend; k+=IT::size ) {
1354  const IntrinsicType a1( A.load(i,k) );
1355  xmm1 = xmm1 + a1 * B.load(k,j );
1356  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1357  }
1358 
1359  (~C)(i,j ) = sum( xmm1 );
1360  (~C)(i,j+1UL) = sum( xmm2 );
1361  }
1362 
1363  if( j < N )
1364  {
1365  const size_t kbegin( ( IsUpper<MT4>::value )
1366  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
1367  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
1368 
1369  IntrinsicType xmm1;
1370 
1371  for( size_t k=kbegin; k<K; k+=IT::size ) {
1372  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1373  }
1374 
1375  (~C)(i,j) = sum( xmm1 );
1376  }
1377  }
1378  }
1380  //**********************************************************************************************
1381 
1382  //**Default assignment to dense matrices (large matrices)***************************************
1396  template< typename MT3 // Type of the left-hand side target matrix
1397  , typename MT4 // Type of the left-hand side matrix operand
1398  , typename MT5 > // Type of the right-hand side matrix operand
1399  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1400  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B )
1401  {
1402  selectDefaultAssignKernel( C, A, B );
1403  }
1405  //**********************************************************************************************
1406 
1407  //**Vectorized default assignment to row-major dense matrices (large matrices)******************
1422  template< typename MT3 // Type of the left-hand side target matrix
1423  , typename MT4 // Type of the left-hand side matrix operand
1424  , typename MT5 > // Type of the right-hand side matrix operand
1425  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1426  selectLargeAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1427  {
1428  // TODO
1429  selectSmallAssignKernel( ~C, A, B );
1430  }
1432  //**********************************************************************************************
1433 
1434  //**Vectorized default assignment to column-major dense matrices (large matrices)***************
1449  template< typename MT3 // Type of the left-hand side target matrix
1450  , typename MT4 // Type of the left-hand side matrix operand
1451  , typename MT5 > // Type of the right-hand side matrix operand
1452  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1453  selectLargeAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1454  {
1455  // TODO
1456  selectSmallAssignKernel( ~C, A, B );
1457  }
1459  //**********************************************************************************************
1460 
1461  //**Default assignment to dense matrices********************************************************
1475  template< typename MT3 // Type of the left-hand side target matrix
1476  , typename MT4 // Type of the left-hand side matrix operand
1477  , typename MT5 > // Type of the right-hand side matrix operand
1478  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1479  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1480  {
1481  selectLargeAssignKernel( C, A, B );
1482  }
1484  //**********************************************************************************************
1485 
1486  //**BLAS-based assignment to dense matrices (single precision)**********************************
1487 #if BLAZE_BLAS_MODE
1488 
1501  template< typename MT3 // Type of the left-hand side target matrix
1502  , typename MT4 // Type of the left-hand side matrix operand
1503  , typename MT5 > // Type of the right-hand side matrix operand
1504  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1505  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1506  {
1507  if( IsTriangular<MT4>::value ) {
1508  assign( C, B );
1509  strmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
1510  }
1511  else if( IsTriangular<MT5>::value ) {
1512  assign( C, A );
1513  strmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
1514  }
1515  else {
1516  sgemm( C, A, B, 1.0F, 0.0F );
1517  }
1518  }
1520 #endif
1521  //**********************************************************************************************
1522 
1523  //**BLAS-based assignment to dense matrices (double precision)**********************************
1524 #if BLAZE_BLAS_MODE
1525 
1538  template< typename MT3 // Type of the left-hand side target matrix
1539  , typename MT4 // Type of the left-hand side matrix operand
1540  , typename MT5 > // Type of the right-hand side matrix operand
1541  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1542  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1543  {
1544  if( IsTriangular<MT4>::value ) {
1545  assign( C, B );
1546  dtrmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
1547  }
1548  else if( IsTriangular<MT5>::value ) {
1549  assign( C, A );
1550  dtrmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
1551  }
1552  else {
1553  dgemm( C, A, B, 1.0, 0.0 );
1554  }
1555  }
1557 #endif
1558  //**********************************************************************************************
1559 
1560  //**BLAS-based assignment to dense matrices (single precision complex)**************************
1561 #if BLAZE_BLAS_MODE
1562 
1575  template< typename MT3 // Type of the left-hand side target matrix
1576  , typename MT4 // Type of the left-hand side matrix operand
1577  , typename MT5 > // Type of the right-hand side matrix operand
1578  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1579  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1580  {
1581  if( IsTriangular<MT4>::value ) {
1582  assign( C, B );
1583  ctrmm( C, A, CblasLeft,
1584  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
1585  complex<float>( 1.0F, 0.0F ) );
1586  }
1587  else if( IsTriangular<MT5>::value ) {
1588  assign( C, A );
1589  ctrmm( C, B, CblasRight,
1590  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
1591  complex<float>( 1.0F, 0.0F ) );
1592  }
1593  else {
1594  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1595  }
1596  }
1598 #endif
1599  //**********************************************************************************************
1600 
1601  //**BLAS-based assignment to dense matrices (double precision complex)**************************
1602 #if BLAZE_BLAS_MODE
1603 
1616  template< typename MT3 // Type of the left-hand side target matrix
1617  , typename MT4 // Type of the left-hand side matrix operand
1618  , typename MT5 > // Type of the right-hand side matrix operand
1619  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1620  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
1621  {
1622  if( IsTriangular<MT4>::value ) {
1623  assign( C, B );
1624  ztrmm( C, A, CblasLeft,
1625  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
1626  complex<double>( 1.0, 0.0 ) );
1627  }
1628  else if( IsTriangular<MT5>::value ) {
1629  assign( C, A );
1630  ztrmm( C, B, CblasRight,
1631  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
1632  complex<double>( 1.0, 0.0 ) );
1633  }
1634  else {
1635  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1636  }
1637  }
1639 #endif
1640  //**********************************************************************************************
1641 
1642  //**Assignment to sparse matrices***************************************************************
1655  template< typename MT // Type of the target sparse matrix
1656  , bool SO > // Storage order of the target sparse matrix
1657  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1658  {
1660 
1661  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1662 
1669 
1670  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1671  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1672 
1673  const TmpType tmp( serial( rhs ) );
1674  assign( ~lhs, tmp );
1675  }
1677  //**********************************************************************************************
1678 
1679  //**Addition assignment to dense matrices*******************************************************
1692  template< typename MT // Type of the target dense matrix
1693  , bool SO > // Storage order of the target dense matrix
1694  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1695  {
1697 
1698  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1699  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1700 
1701  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1702  return;
1703  }
1704 
1705  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1706  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1707 
1708  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1709  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1710  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1711  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1712  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1713  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1714 
1715  DMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1716  }
1718  //**********************************************************************************************
1719 
1720  //**Addition assignment to dense matrices (kernel selection)************************************
1731  template< typename MT3 // Type of the left-hand side target matrix
1732  , typename MT4 // Type of the left-hand side matrix operand
1733  , typename MT5 > // Type of the right-hand side matrix operand
1734  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1735  {
1736  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
1737  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
1738  selectSmallAddAssignKernel( C, A, B );
1739  else
1740  selectBlasAddAssignKernel( C, A, B );
1741  }
1743  //**********************************************************************************************
1744 
1745  //**Default addition assignment to row-major dense matrices (general/general)*******************
1759  template< typename MT3 // Type of the left-hand side target matrix
1760  , typename MT4 // Type of the left-hand side matrix operand
1761  , typename MT5 > // Type of the right-hand side matrix operand
1762  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
1763  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1764  {
1765  const size_t M( A.rows() );
1766  const size_t N( B.columns() );
1767  const size_t K( A.columns() );
1768 
1769  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
1770  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
1771  :( 0UL ) );
1772  const size_t iend( ( IsStrictlyUpper<MT4>::value )
1773  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
1774  :( M ) );
1775  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1776 
1777  for( size_t i=ibegin; i<iend; ++i )
1778  {
1779  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
1780  ?( ( IsStrictlyUpper<MT4>::value )
1781  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
1782  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
1783  :( IsStrictlyUpper<MT5>::value ? 1UL : 0UL ) );
1784  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
1785  ?( ( IsStrictlyLower<MT4>::value )
1786  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
1787  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
1788  :( IsStrictlyLower<MT5>::value ? N-1UL : N ) );
1789  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1790 
1791  for( size_t j=jbegin; j<jend; ++j )
1792  {
1793  const size_t kbegin( ( IsUpper<MT4>::value )
1794  ?( ( IsLower<MT5>::value )
1795  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1796  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1797  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1798  :( ( IsLower<MT5>::value )
1799  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1800  :( 0UL ) ) );
1801  const size_t kend( ( IsLower<MT4>::value )
1802  ?( ( IsUpper<MT5>::value )
1803  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
1804  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
1805  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1806  :( ( IsUpper<MT5>::value )
1807  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1808  :( K ) ) );
1809  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
1810 
1811  const size_t knum( kend - kbegin );
1812  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
1813 
1814  for( size_t k=kbegin; k<kpos; k+=2UL ) {
1815  (~C)(i,j) += A(i,k ) * B(k ,j);
1816  (~C)(i,j) += A(i,k+1UL) * B(k+1UL,j);
1817  }
1818  if( kpos < kend ) {
1819  (~C)(i,j) += A(i,kpos) * B(kpos,j);
1820  }
1821  }
1822  }
1823  }
1825  //**********************************************************************************************
1826 
1827  //**Default addition assignment to column-major dense matrices (general/general)****************
1841  template< typename MT3 // Type of the left-hand side target matrix
1842  , typename MT4 // Type of the left-hand side matrix operand
1843  , typename MT5 > // Type of the right-hand side matrix operand
1844  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
1845  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1846  {
1847  const size_t M( A.rows() );
1848  const size_t N( B.columns() );
1849  const size_t K( A.columns() );
1850 
1851  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
1852  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
1853  :( 0UL ) );
1854  const size_t jend( ( IsStrictlyLower<MT5>::value )
1855  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
1856  :( N ) );
1857  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1858 
1859  for( size_t j=jbegin; j<jend; ++j )
1860  {
1861  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
1862  ?( ( IsStrictlyLower<MT4>::value )
1863  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
1864  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
1865  :( IsStrictlyLower<MT4>::value ? 1UL : 0UL ) );
1866  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
1867  ?( ( IsStrictlyUpper<MT4>::value )
1868  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
1869  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
1870  :( IsStrictlyUpper<MT4>::value ? M-1UL : M ) );
1871  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
1872 
1873  for( size_t i=ibegin; i<iend; ++i )
1874  {
1875  const size_t kbegin( ( IsUpper<MT4>::value )
1876  ?( ( IsLower<MT5>::value )
1877  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1878  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
1879  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
1880  :( ( IsLower<MT5>::value )
1881  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
1882  :( 0UL ) ) );
1883  const size_t kend( ( IsLower<MT4>::value )
1884  ?( ( IsUpper<MT5>::value )
1885  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
1886  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
1887  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
1888  :( ( IsUpper<MT5>::value )
1889  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
1890  :( K ) ) );
1891  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
1892 
1893  const size_t knum( kend - kbegin );
1894  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
1895 
1896  for( size_t k=kbegin; k<kpos; k+=2UL ) {
1897  (~C)(i,j) += A(i,k ) * B(k ,j);
1898  (~C)(i,j) += A(i,k+1UL) * B(k+1UL,j);
1899  }
1900  if( kpos < kend ) {
1901  (~C)(i,j) += A(i,kpos) * B(kpos,j);
1902  }
1903  }
1904  }
1905  }
1907  //**********************************************************************************************
1908 
1909  //**Default addition assignment to row-major dense matrices (general/diagonal)******************
1923  template< typename MT3 // Type of the left-hand side target matrix
1924  , typename MT4 // Type of the left-hand side matrix operand
1925  , typename MT5 > // Type of the right-hand side matrix operand
1926  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
1927  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1928  {
1929  const size_t M( A.rows() );
1930  const size_t N( B.columns() );
1931 
1932  for( size_t i=0UL; i<M; ++i )
1933  {
1934  const size_t jbegin( ( IsUpper<MT4>::value )
1935  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
1936  :( 0UL ) );
1937  const size_t jend( ( IsLower<MT4>::value )
1938  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
1939  :( N ) );
1940  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
1941 
1942  const size_t jnum( jend - jbegin );
1943  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
1944 
1945  for( size_t j=jbegin; j<jpos; j+=2UL ) {
1946  (~C)(i,j ) += A(i,j ) * B(j ,j );
1947  (~C)(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL);
1948  }
1949  if( jpos < jend ) {
1950  (~C)(i,jpos) += A(i,jpos) * B(jpos,jpos);
1951  }
1952  }
1953  }
1955  //**********************************************************************************************
1956 
1957  //**Default addition assignment to column-major dense matrices (general/diagonal)***************
1971  template< typename MT3 // Type of the left-hand side target matrix
1972  , typename MT4 // Type of the left-hand side matrix operand
1973  , typename MT5 > // Type of the right-hand side matrix operand
1974  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
1975  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1976  {
1977  const size_t M( A.rows() );
1978  const size_t N( B.columns() );
1979 
1980  const size_t block( 16UL );
1981 
1982  for( size_t jj=0UL; jj<N; jj+=block ) {
1983  const size_t jend( min( N, jj+block ) );
1984  for( size_t ii=0UL; ii<M; ii+=block ) {
1985  const size_t iend( min( M, ii+block ) );
1986  for( size_t j=jj; j<jend; ++j )
1987  {
1988  const size_t ibegin( ( IsLower<MT4>::value )
1989  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
1990  :( ii ) );
1991  const size_t ipos( ( IsUpper<MT4>::value )
1992  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
1993  :( iend ) );
1994 
1995  for( size_t i=ibegin; i<ipos; ++i ) {
1996  (~C)(i,j) += A(i,j) * B(j,j);
1997  }
1998  }
1999  }
2000  }
2001  }
2003  //**********************************************************************************************
2004 
2005  //**Default addition assignment to row-major dense matrices (diagonal/general)******************
2019  template< typename MT3 // Type of the left-hand side target matrix
2020  , typename MT4 // Type of the left-hand side matrix operand
2021  , typename MT5 > // Type of the right-hand side matrix operand
2022  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
2023  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2024  {
2025  const size_t M( A.rows() );
2026  const size_t N( B.columns() );
2027 
2028  const size_t block( 16UL );
2029 
2030  for( size_t ii=0UL; ii<M; ii+=block ) {
2031  const size_t iend( min( M, ii+block ) );
2032  for( size_t jj=0UL; jj<N; jj+=block ) {
2033  const size_t jend( min( N, jj+block ) );
2034  for( size_t i=ii; i<iend; ++i )
2035  {
2036  const size_t jbegin( ( IsUpper<MT5>::value )
2037  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
2038  :( jj ) );
2039  const size_t jpos( ( IsLower<MT5>::value )
2040  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
2041  :( jend ) );
2042 
2043  for( size_t j=jbegin; j<jpos; ++j ) {
2044  (~C)(i,j) += A(i,i) * B(i,j);
2045  }
2046  }
2047  }
2048  }
2049  }
2051  //**********************************************************************************************
2052 
2053  //**Default addition assignment to column-major dense matrices (diagonal/general)***************
2067  template< typename MT3 // Type of the left-hand side target matrix
2068  , typename MT4 // Type of the left-hand side matrix operand
2069  , typename MT5 > // Type of the right-hand side matrix operand
2070  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
2071  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2072  {
2073  const size_t M( A.rows() );
2074  const size_t N( B.columns() );
2075 
2076  for( size_t j=0UL; j<N; ++j )
2077  {
2078  const size_t ibegin( ( IsLower<MT5>::value )
2079  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2080  :( 0UL ) );
2081  const size_t iend( ( IsUpper<MT5>::value )
2082  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2083  :( M ) );
2084  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2085 
2086  const size_t inum( iend - ibegin );
2087  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
2088 
2089  for( size_t i=ibegin; i<ipos; i+=2UL ) {
2090  (~C)(i ,j) += A(i ,i ) * B(i ,j);
2091  (~C)(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j);
2092  }
2093  if( ipos < iend ) {
2094  (~C)(ipos,j) += A(ipos,ipos) * B(ipos,j);
2095  }
2096  }
2097  }
2099  //**********************************************************************************************
2100 
2101  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
2115  template< typename MT3 // Type of the left-hand side target matrix
2116  , typename MT4 // Type of the left-hand side matrix operand
2117  , typename MT5 > // Type of the right-hand side matrix operand
2118  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
2119  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2120  {
2121  for( size_t i=0UL; i<A.rows(); ++i ) {
2122  C(i,i) += A(i,i) * B(i,i);
2123  }
2124  }
2126  //**********************************************************************************************
2127 
2128  //**Default addition assignment to dense matrices (small matrices)******************************
2142  template< typename MT3 // Type of the left-hand side target matrix
2143  , typename MT4 // Type of the left-hand side matrix operand
2144  , typename MT5 > // Type of the right-hand side matrix operand
2145  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2146  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2147  {
2148  selectDefaultAddAssignKernel( C, A, B );
2149  }
2151  //**********************************************************************************************
2152 
2153  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
2168  template< typename MT3 // Type of the left-hand side target matrix
2169  , typename MT4 // Type of the left-hand side matrix operand
2170  , typename MT5 > // Type of the right-hand side matrix operand
2171  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2172  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2173  {
2174  typedef IntrinsicTrait<ElementType> IT;
2175 
2176  const size_t M( A.rows() );
2177  const size_t N( B.columns() );
2178  const size_t K( A.columns() );
2179 
2180  size_t i( 0UL );
2181 
2182  for( ; (i+2UL) <= M; i+=2UL )
2183  {
2184  size_t j( 0UL );
2185 
2186  for( ; (j+4UL) <= N; j+=4UL )
2187  {
2188  const size_t kbegin( ( IsUpper<MT4>::value )
2189  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2190  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2191  const size_t kend( ( IsLower<MT4>::value )
2192  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
2193  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
2194 
2195  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2196 
2197  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2198  const IntrinsicType a1( A.load(i ,k) );
2199  const IntrinsicType a2( A.load(i+1UL,k) );
2200  const IntrinsicType b1( B.load(k,j ) );
2201  const IntrinsicType b2( B.load(k,j+1UL) );
2202  const IntrinsicType b3( B.load(k,j+2UL) );
2203  const IntrinsicType b4( B.load(k,j+3UL) );
2204  xmm1 = xmm1 + a1 * b1;
2205  xmm2 = xmm2 + a1 * b2;
2206  xmm3 = xmm3 + a1 * b3;
2207  xmm4 = xmm4 + a1 * b4;
2208  xmm5 = xmm5 + a2 * b1;
2209  xmm6 = xmm6 + a2 * b2;
2210  xmm7 = xmm7 + a2 * b3;
2211  xmm8 = xmm8 + a2 * b4;
2212  }
2213 
2214  (~C)(i ,j ) += sum( xmm1 );
2215  (~C)(i ,j+1UL) += sum( xmm2 );
2216  (~C)(i ,j+2UL) += sum( xmm3 );
2217  (~C)(i ,j+3UL) += sum( xmm4 );
2218  (~C)(i+1UL,j ) += sum( xmm5 );
2219  (~C)(i+1UL,j+1UL) += sum( xmm6 );
2220  (~C)(i+1UL,j+2UL) += sum( xmm7 );
2221  (~C)(i+1UL,j+3UL) += sum( xmm8 );
2222  }
2223 
2224  for( ; (j+2UL) <= N; j+=2UL )
2225  {
2226  const size_t kbegin( ( IsUpper<MT4>::value )
2227  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2228  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2229  const size_t kend( ( IsLower<MT4>::value )
2230  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
2231  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2232 
2233  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2234 
2235  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2236  const IntrinsicType a1( A.load(i ,k) );
2237  const IntrinsicType a2( A.load(i+1UL,k) );
2238  const IntrinsicType b1( B.load(k,j ) );
2239  const IntrinsicType b2( B.load(k,j+1UL) );
2240  xmm1 = xmm1 + a1 * b1;
2241  xmm2 = xmm2 + a1 * b2;
2242  xmm3 = xmm3 + a2 * b1;
2243  xmm4 = xmm4 + a2 * b2;
2244  }
2245 
2246  (~C)(i ,j ) += sum( xmm1 );
2247  (~C)(i ,j+1UL) += sum( xmm2 );
2248  (~C)(i+1UL,j ) += sum( xmm3 );
2249  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2250  }
2251 
2252  if( j < N )
2253  {
2254  const size_t kbegin( ( IsUpper<MT4>::value )
2255  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2256  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2257  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
2258 
2259  IntrinsicType xmm1, xmm2;
2260 
2261  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2262  const IntrinsicType b1( B.load(k,j) );
2263  xmm1 = xmm1 + A.load(i ,k) * b1;
2264  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2265  }
2266 
2267  (~C)(i ,j) += sum( xmm1 );
2268  (~C)(i+1UL,j) += sum( xmm2 );
2269  }
2270  }
2271  if( i < M )
2272  {
2273  size_t j( 0UL );
2274 
2275  for( ; (j+4UL) <= N; j+=4UL )
2276  {
2277  const size_t kbegin( ( IsUpper<MT4>::value )
2278  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2279  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2280  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
2281 
2282  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2283 
2284  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2285  const IntrinsicType a1( A.load(i,k) );
2286  xmm1 = xmm1 + a1 * B.load(k,j );
2287  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2288  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
2289  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
2290  }
2291 
2292  (~C)(i,j ) += sum( xmm1 );
2293  (~C)(i,j+1UL) += sum( xmm2 );
2294  (~C)(i,j+2UL) += sum( xmm3 );
2295  (~C)(i,j+3UL) += sum( xmm4 );
2296  }
2297 
2298  for( ; (j+2UL) <= N; j+=2UL )
2299  {
2300  const size_t kbegin( ( IsUpper<MT4>::value )
2301  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2302  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2303  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
2304 
2305  IntrinsicType xmm1, xmm2;
2306 
2307  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2308  const IntrinsicType a1( A.load(i,k) );
2309  xmm1 = xmm1 + a1 * B.load(k,j );
2310  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2311  }
2312 
2313  (~C)(i,j ) += sum( xmm1 );
2314  (~C)(i,j+1UL) += sum( xmm2 );
2315  }
2316 
2317  if( j < N )
2318  {
2319  const size_t kbegin( ( IsUpper<MT4>::value )
2320  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2321  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2322 
2323  IntrinsicType xmm1;
2324 
2325  for( size_t k=kbegin; k<K; k+=IT::size ) {
2326  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2327  }
2328 
2329  (~C)(i,j) += sum( xmm1 );
2330  }
2331  }
2332  }
2334  //**********************************************************************************************
2335 
2336  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
2351  template< typename MT3 // Type of the left-hand side target matrix
2352  , typename MT4 // Type of the left-hand side matrix operand
2353  , typename MT5 > // Type of the right-hand side matrix operand
2354  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2355  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2356  {
2357  typedef IntrinsicTrait<ElementType> IT;
2358 
2359  const size_t M( A.rows() );
2360  const size_t N( B.columns() );
2361  const size_t K( A.columns() );
2362 
2363  size_t i( 0UL );
2364 
2365  for( ; (i+4UL) <= M; i+=4UL )
2366  {
2367  size_t j( 0UL );
2368 
2369  for( ; (j+2UL) <= N; j+=2UL )
2370  {
2371  const size_t kbegin( ( IsUpper<MT4>::value )
2372  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2373  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2374  const size_t kend( ( IsLower<MT4>::value )
2375  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
2376  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2377 
2378  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2379 
2380  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2381  const IntrinsicType a1( A.load(i ,k) );
2382  const IntrinsicType a2( A.load(i+1UL,k) );
2383  const IntrinsicType a3( A.load(i+2UL,k) );
2384  const IntrinsicType a4( A.load(i+3UL,k) );
2385  const IntrinsicType b1( B.load(k,j ) );
2386  const IntrinsicType b2( B.load(k,j+1UL) );
2387  xmm1 = xmm1 + a1 * b1;
2388  xmm2 = xmm2 + a1 * b2;
2389  xmm3 = xmm3 + a2 * b1;
2390  xmm4 = xmm4 + a2 * b2;
2391  xmm5 = xmm5 + a3 * b1;
2392  xmm6 = xmm6 + a3 * b2;
2393  xmm7 = xmm7 + a4 * b1;
2394  xmm8 = xmm8 + a4 * b2;
2395  }
2396 
2397  (~C)(i ,j ) += sum( xmm1 );
2398  (~C)(i ,j+1UL) += sum( xmm2 );
2399  (~C)(i+1UL,j ) += sum( xmm3 );
2400  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2401  (~C)(i+2UL,j ) += sum( xmm5 );
2402  (~C)(i+2UL,j+1UL) += sum( xmm6 );
2403  (~C)(i+3UL,j ) += sum( xmm7 );
2404  (~C)(i+3UL,j+1UL) += sum( xmm8 );
2405  }
2406 
2407  if( j < N )
2408  {
2409  const size_t kbegin( ( IsUpper<MT4>::value )
2410  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2411  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2412  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
2413 
2414  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2415 
2416  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2417  const IntrinsicType b1( B.load(k,j) );
2418  xmm1 = xmm1 + A.load(i ,k) * b1;
2419  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2420  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
2421  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
2422  }
2423 
2424  (~C)(i ,j) += sum( xmm1 );
2425  (~C)(i+1UL,j) += sum( xmm2 );
2426  (~C)(i+2UL,j) += sum( xmm3 );
2427  (~C)(i+3UL,j) += sum( xmm4 );
2428  }
2429  }
2430 
2431  for( ; (i+2UL) <= M; i+=2UL )
2432  {
2433  size_t j( 0UL );
2434 
2435  for( ; (j+2UL) <= N; j+=2UL )
2436  {
2437  const size_t kbegin( ( IsUpper<MT4>::value )
2438  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2439  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2440  const size_t kend( ( IsLower<MT4>::value )
2441  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
2442  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
2443 
2444  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2445 
2446  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2447  const IntrinsicType a1( A.load(i ,k) );
2448  const IntrinsicType a2( A.load(i+1UL,k) );
2449  const IntrinsicType b1( B.load(k,j ) );
2450  const IntrinsicType b2( B.load(k,j+1UL) );
2451  xmm1 = xmm1 + a1 * b1;
2452  xmm2 = xmm2 + a1 * b2;
2453  xmm3 = xmm3 + a2 * b1;
2454  xmm4 = xmm4 + a2 * b2;
2455  }
2456 
2457  (~C)(i ,j ) += sum( xmm1 );
2458  (~C)(i ,j+1UL) += sum( xmm2 );
2459  (~C)(i+1UL,j ) += sum( xmm3 );
2460  (~C)(i+1UL,j+1UL) += sum( xmm4 );
2461  }
2462 
2463  if( j < N )
2464  {
2465  const size_t kbegin( ( IsUpper<MT4>::value )
2466  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2467  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2468  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
2469 
2470  IntrinsicType xmm1, xmm2;
2471 
2472  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2473  const IntrinsicType b1( B.load(k,j) );
2474  xmm1 = xmm1 + A.load(i ,k) * b1;
2475  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2476  }
2477 
2478  (~C)(i ,j) += sum( xmm1 );
2479  (~C)(i+1UL,j) += sum( xmm2 );
2480  }
2481  }
2482 
2483  if( i < M )
2484  {
2485  size_t j( 0UL );
2486 
2487  for( ; (j+2UL) <= N; j+=2UL )
2488  {
2489  const size_t kbegin( ( IsUpper<MT4>::value )
2490  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2491  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2492  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
2493 
2494  IntrinsicType xmm1, xmm2;
2495 
2496  for( size_t k=kbegin; k<kend; k+=IT::size ) {
2497  const IntrinsicType a1( A.load(i,k) );
2498  xmm1 = xmm1 + a1 * B.load(k,j );
2499  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2500  }
2501 
2502  (~C)(i,j ) += sum( xmm1 );
2503  (~C)(i,j+1UL) += sum( xmm2 );
2504  }
2505 
2506  if( j < N )
2507  {
2508  const size_t kbegin( ( IsUpper<MT4>::value )
2509  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
2510  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
2511 
2512  IntrinsicType xmm1;
2513 
2514  for( size_t k=kbegin; k<K; k+=IT::size ) {
2515  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2516  }
2517 
2518  (~C)(i,j) += sum( xmm1 );
2519  }
2520  }
2521  }
2523  //**********************************************************************************************
2524 
2525  //**Default addition assignment to dense matrices (large matrices)******************************
2539  template< typename MT3 // Type of the left-hand side target matrix
2540  , typename MT4 // Type of the left-hand side matrix operand
2541  , typename MT5 > // Type of the right-hand side matrix operand
2542  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2543  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2544  {
2545  selectDefaultAddAssignKernel( C, A, B );
2546  }
2548  //**********************************************************************************************
2549 
2550  //**Vectorized default addition assignment to row-major dense matrices (large matrices)*********
2565  template< typename MT3 // Type of the left-hand side target matrix
2566  , typename MT4 // Type of the left-hand side matrix operand
2567  , typename MT5 > // Type of the right-hand side matrix operand
2568  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2569  selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2570  {
2571  // TODO
2572  selectSmallAddAssignKernel( ~C, A, B );
2573  }
2575  //**********************************************************************************************
2576 
2577  //**Vectorized default addition assignment to column-major dense matrices (large matrices)******
2592  template< typename MT3 // Type of the left-hand side target matrix
2593  , typename MT4 // Type of the left-hand side matrix operand
2594  , typename MT5 > // Type of the right-hand side matrix operand
2595  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
2596  selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2597  {
2598  // TODO
2599  selectSmallAddAssignKernel( ~C, A, B );
2600  }
2602  //**********************************************************************************************
2603 
2604  //**Default addition assignment to dense matrices***********************************************
2618  template< typename MT3 // Type of the left-hand side target matrix
2619  , typename MT4 // Type of the left-hand side matrix operand
2620  , typename MT5 > // Type of the right-hand side matrix operand
2621  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2622  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2623  {
2624  selectLargeAddAssignKernel( C, A, B );
2625  }
2627  //**********************************************************************************************
2628 
2629  //**BLAS-based addition assignment to dense matrices (single precision)*************************
2630 #if BLAZE_BLAS_MODE
2631 
2644  template< typename MT3 // Type of the left-hand side target matrix
2645  , typename MT4 // Type of the left-hand side matrix operand
2646  , typename MT5 > // Type of the right-hand side matrix operand
2647  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2648  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2649  {
2650  if( IsTriangular<MT4>::value ) {
2651  typename MT3::ResultType tmp( B );
2652  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
2653  addAssign( C, tmp );
2654  }
2655  else if( IsTriangular<MT5>::value ) {
2656  typename MT3::ResultType tmp( A );
2657  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
2658  addAssign( C, tmp );
2659  }
2660  else {
2661  sgemm( C, A, B, 1.0F, 1.0F );
2662  }
2663  }
2665 #endif
2666  //**********************************************************************************************
2667 
2668  //**BLAS-based addition assignment to dense matrices (double precision)*************************
2669 #if BLAZE_BLAS_MODE
2670 
2683  template< typename MT3 // Type of the left-hand side target matrix
2684  , typename MT4 // Type of the left-hand side matrix operand
2685  , typename MT5 > // Type of the right-hand side matrix operand
2686  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2687  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2688  {
2689  if( IsTriangular<MT4>::value ) {
2690  typename MT3::ResultType tmp( B );
2691  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
2692  addAssign( C, tmp );
2693  }
2694  else if( IsTriangular<MT5>::value ) {
2695  typename MT3::ResultType tmp( A );
2696  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
2697  addAssign( C, tmp );
2698  }
2699  else {
2700  dgemm( C, A, B, 1.0, 1.0 );
2701  }
2702  }
2704 #endif
2705  //**********************************************************************************************
2706 
2707  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
2708 #if BLAZE_BLAS_MODE
2709 
2722  template< typename MT3 // Type of the left-hand side target matrix
2723  , typename MT4 // Type of the left-hand side matrix operand
2724  , typename MT5 > // Type of the right-hand side matrix operand
2725  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2726  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2727  {
2728  if( IsTriangular<MT4>::value ) {
2729  typename MT3::ResultType tmp( B );
2730  ctrmm( tmp, A, CblasLeft,
2731  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
2732  complex<float>( 1.0F, 0.0F ) );
2733  addAssign( C, tmp );
2734  }
2735  else if( IsTriangular<MT5>::value ) {
2736  typename MT3::ResultType tmp( A );
2737  ctrmm( tmp, B, CblasRight,
2738  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
2739  complex<float>( 1.0F, 0.0F ) );
2740  addAssign( C, tmp );
2741  }
2742  else {
2743  cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2744  }
2745  }
2747 #endif
2748  //**********************************************************************************************
2749 
2750  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
2751 #if BLAZE_BLAS_MODE
2752 
2765  template< typename MT3 // Type of the left-hand side target matrix
2766  , typename MT4 // Type of the left-hand side matrix operand
2767  , typename MT5 > // Type of the right-hand side matrix operand
2768  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2769  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
2770  {
2771  if( IsTriangular<MT4>::value ) {
2772  typename MT3::ResultType tmp( B );
2773  ztrmm( tmp, A, CblasLeft,
2774  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
2775  complex<double>( 1.0, 0.0 ) );
2776  addAssign( C, tmp );
2777  }
2778  else if( IsTriangular<MT5>::value ) {
2779  typename MT3::ResultType tmp( A );
2780  ztrmm( tmp, B, CblasRight,
2781  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
2782  complex<double>( 1.0, 0.0 ) );
2783  addAssign( C, tmp );
2784  }
2785  else {
2786  zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2787  }
2788  }
2790 #endif
2791  //**********************************************************************************************
2792 
2793  //**Addition assignment to sparse matrices******************************************************
2794  // No special implementation for the addition assignment to sparse matrices.
2795  //**********************************************************************************************
2796 
2797  //**Subtraction assignment to dense matrices****************************************************
2810  template< typename MT // Type of the target dense matrix
2811  , bool SO > // Storage order of the target dense matrix
2812  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2813  {
2815 
2816  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2817  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2818 
2819  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2820  return;
2821  }
2822 
2823  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
2824  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
2825 
2826  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2827  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2828  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2829  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2830  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2831  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2832 
2833  DMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
2834  }
2836  //**********************************************************************************************
2837 
2838  //**Subtraction assignment to dense matrices (kernel selection)*********************************
2849  template< typename MT3 // Type of the left-hand side target matrix
2850  , typename MT4 // Type of the left-hand side matrix operand
2851  , typename MT5 > // Type of the right-hand side matrix operand
2852  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2853  {
2854  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
2855  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
2856  selectSmallSubAssignKernel( C, A, B );
2857  else
2858  selectBlasSubAssignKernel( C, A, B );
2859  }
2861  //**********************************************************************************************
2862 
2863  //**Default subtraction assignment to row-major dense matrices (general/general)****************
2877  template< typename MT3 // Type of the left-hand side target matrix
2878  , typename MT4 // Type of the left-hand side matrix operand
2879  , typename MT5 > // Type of the right-hand side matrix operand
2880  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
2881  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
2882  {
2883  const size_t M( A.rows() );
2884  const size_t N( B.columns() );
2885  const size_t K( A.columns() );
2886 
2887  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
2888  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
2889  :( 0UL ) );
2890  const size_t iend( ( IsStrictlyUpper<MT4>::value )
2891  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
2892  :( M ) );
2893  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2894 
2895  for( size_t i=ibegin; i<iend; ++i )
2896  {
2897  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
2898  ?( ( IsStrictlyUpper<MT4>::value )
2899  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
2900  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
2901  :( IsStrictlyUpper<MT5>::value ? 1UL : 0UL ) );
2902  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
2903  ?( ( IsStrictlyLower<MT4>::value )
2904  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
2905  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
2906  :( IsStrictlyLower<MT5>::value ? N-1UL : N ) );
2907  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2908 
2909  for( size_t j=jbegin; j<jend; ++j )
2910  {
2911  const size_t kbegin( ( IsUpper<MT4>::value )
2912  ?( ( IsLower<MT5>::value )
2913  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2914  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2915  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2916  :( ( IsLower<MT5>::value )
2917  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
2918  :( 0UL ) ) );
2919  const size_t kend( ( IsLower<MT4>::value )
2920  ?( ( IsUpper<MT5>::value )
2921  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
2922  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
2923  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
2924  :( ( IsUpper<MT5>::value )
2925  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
2926  :( K ) ) );
2927  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
2928 
2929  const size_t knum( kend - kbegin );
2930  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
2931 
2932  for( size_t k=kbegin; k<kpos; k+=2UL ) {
2933  (~C)(i,j) -= A(i,k ) * B(k ,j);
2934  (~C)(i,j) -= A(i,k+1UL) * B(k+1UL,j);
2935  }
2936  if( kpos < kend ) {
2937  (~C)(i,j) -= A(i,kpos) * B(kpos,j);
2938  }
2939  }
2940  }
2941  }
2943  //**********************************************************************************************
2944 
2945  //**Default subtraction assignment to column-major dense matrices (general/general)*************
2959  template< typename MT3 // Type of the left-hand side target matrix
2960  , typename MT4 // Type of the left-hand side matrix operand
2961  , typename MT5 > // Type of the right-hand side matrix operand
2962  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
2963  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
2964  {
2965  const size_t M( A.rows() );
2966  const size_t N( B.columns() );
2967  const size_t K( A.columns() );
2968 
2969  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
2970  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
2971  :( 0UL ) );
2972  const size_t jend( ( IsStrictlyLower<MT5>::value )
2973  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
2974  :( N ) );
2975  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
2976 
2977  for( size_t j=jbegin; j<jend; ++j )
2978  {
2979  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
2980  ?( ( IsStrictlyLower<MT4>::value )
2981  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
2982  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
2983  :( IsStrictlyLower<MT4>::value ? 1UL : 0UL ) );
2984  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
2985  ?( ( IsStrictlyUpper<MT4>::value )
2986  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
2987  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
2988  :( IsStrictlyUpper<MT4>::value ? M-1UL : M ) );
2989  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
2990 
2991  for( size_t i=ibegin; i<iend; ++i )
2992  {
2993  const size_t kbegin( ( IsUpper<MT4>::value )
2994  ?( ( IsLower<MT5>::value )
2995  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
2996  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
2997  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
2998  :( ( IsLower<MT5>::value )
2999  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3000  :( 0UL ) ) );
3001  const size_t kend( ( IsLower<MT4>::value )
3002  ?( ( IsUpper<MT5>::value )
3003  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
3004  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
3005  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
3006  :( ( IsUpper<MT5>::value )
3007  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3008  :( K ) ) );
3009  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
3010 
3011  const size_t knum( kend - kbegin );
3012  const size_t kpos( kbegin + ( knum & size_t(-2) ) );
3013 
3014  for( size_t k=kbegin; k<kpos; k+=2UL ) {
3015  (~C)(i,j) -= A(i,k ) * B(k ,j);
3016  (~C)(i,j) -= A(i,k+1UL) * B(k+1UL,j);
3017  }
3018  if( kpos < kend ) {
3019  (~C)(i,j) -= A(i,kpos) * B(kpos,j);
3020  }
3021  }
3022  }
3023  }
3025  //**********************************************************************************************
3026 
3027  //**Default subtraction assignment to row-major dense matrices (general/diagonal)***************
3041  template< typename MT3 // Type of the left-hand side target matrix
3042  , typename MT4 // Type of the left-hand side matrix operand
3043  , typename MT5 > // Type of the right-hand side matrix operand
3044  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
3045  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3046  {
3047  const size_t M( A.rows() );
3048  const size_t N( B.columns() );
3049 
3050  for( size_t i=0UL; i<M; ++i )
3051  {
3052  const size_t jbegin( ( IsUpper<MT4>::value )
3053  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
3054  :( 0UL ) );
3055  const size_t jend( ( IsLower<MT4>::value )
3056  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
3057  :( N ) );
3058  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
3059 
3060  const size_t jnum( jend - jbegin );
3061  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
3062 
3063  for( size_t j=jbegin; j<jpos; j+=2UL ) {
3064  (~C)(i,j ) -= A(i,j ) * B(j ,j );
3065  (~C)(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL);
3066  }
3067  if( jpos < jend ) {
3068  (~C)(i,jpos) -= A(i,jpos) * B(jpos,jpos);
3069  }
3070  }
3071  }
3073  //**********************************************************************************************
3074 
3075  //**Default subtraction assignment to column-major dense matrices (general/diagonal)************
3089  template< typename MT3 // Type of the left-hand side target matrix
3090  , typename MT4 // Type of the left-hand side matrix operand
3091  , typename MT5 > // Type of the right-hand side matrix operand
3092  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
3093  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3094  {
3095  const size_t M( A.rows() );
3096  const size_t N( B.columns() );
3097 
3098  const size_t block( 16UL );
3099 
3100  for( size_t jj=0UL; jj<N; jj+=block ) {
3101  const size_t jend( min( N, jj+block ) );
3102  for( size_t ii=0UL; ii<M; ii+=block ) {
3103  const size_t iend( min( M, ii+block ) );
3104  for( size_t j=jj; j<jend; ++j )
3105  {
3106  const size_t ibegin( ( IsLower<MT4>::value )
3107  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
3108  :( ii ) );
3109  const size_t ipos( ( IsUpper<MT4>::value )
3110  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
3111  :( iend ) );
3112 
3113  for( size_t i=ibegin; i<ipos; ++i ) {
3114  (~C)(i,j) -= A(i,j) * B(j,j);
3115  }
3116  }
3117  }
3118  }
3119  }
3121  //**********************************************************************************************
3122 
3123  //**Default subtraction assignment to row-major dense matrices (diagonal/general)***************
3137  template< typename MT3 // Type of the left-hand side target matrix
3138  , typename MT4 // Type of the left-hand side matrix operand
3139  , typename MT5 > // Type of the right-hand side matrix operand
3140  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
3141  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3142  {
3143  const size_t M( A.rows() );
3144  const size_t N( B.columns() );
3145 
3146  const size_t block( 16UL );
3147 
3148  for( size_t ii=0UL; ii<M; ii+=block ) {
3149  const size_t iend( min( M, ii+block ) );
3150  for( size_t jj=0UL; jj<N; jj+=block ) {
3151  const size_t jend( min( N, jj+block ) );
3152  for( size_t i=ii; i<iend; ++i )
3153  {
3154  const size_t jbegin( ( IsUpper<MT5>::value )
3155  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
3156  :( jj ) );
3157  const size_t jpos( ( IsLower<MT5>::value )
3158  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
3159  :( jend ) );
3160 
3161  for( size_t j=jbegin; j<jpos; ++j ) {
3162  (~C)(i,j) -= A(i,i) * B(i,j);
3163  }
3164  }
3165  }
3166  }
3167  }
3169  //**********************************************************************************************
3170 
3171  //**Default subtraction assignment to column-major dense matrices (diagonal/general)************
3185  template< typename MT3 // Type of the left-hand side target matrix
3186  , typename MT4 // Type of the left-hand side matrix operand
3187  , typename MT5 > // Type of the right-hand side matrix operand
3188  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
3189  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3190  {
3191  const size_t M( A.rows() );
3192  const size_t N( B.columns() );
3193 
3194  for( size_t j=0UL; j<N; ++j )
3195  {
3196  const size_t ibegin( ( IsLower<MT5>::value )
3197  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
3198  :( 0UL ) );
3199  const size_t iend( ( IsUpper<MT5>::value )
3200  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
3201  :( M ) );
3202  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
3203 
3204  const size_t inum( iend - ibegin );
3205  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
3206 
3207  for( size_t i=ibegin; i<ipos; i+=2UL ) {
3208  (~C)(i ,j) -= A(i ,i ) * B(i ,j);
3209  (~C)(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j);
3210  }
3211  if( ipos < iend ) {
3212  (~C)(ipos,j) -= A(ipos,ipos) * B(ipos,j);
3213  }
3214  }
3215  }
3217  //**********************************************************************************************
3218 
3219  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
3233  template< typename MT3 // Type of the left-hand side target matrix
3234  , typename MT4 // Type of the left-hand side matrix operand
3235  , typename MT5 > // Type of the right-hand side matrix operand
3236  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
3237  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3238  {
3239  for( size_t i=0UL; i<A.rows(); ++i ) {
3240  C(i,i) -= A(i,i) * B(i,i);
3241  }
3242  }
3244  //**********************************************************************************************
3245 
3246  //**Default subtraction assignment to dense matrices (small matrices)***************************
3260  template< typename MT3 // Type of the left-hand side target matrix
3261  , typename MT4 // Type of the left-hand side matrix operand
3262  , typename MT5 > // Type of the right-hand side matrix operand
3263  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3264  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3265  {
3266  selectDefaultSubAssignKernel( ~C, A, B );
3267  }
3269  //**********************************************************************************************
3270 
3271  //**Default subtraction assignment to row-major dense matrices (small matrices)*****************
3286  template< typename MT3 // Type of the left-hand side target matrix
3287  , typename MT4 // Type of the left-hand side matrix operand
3288  , typename MT5 > // Type of the right-hand side matrix operand
3289  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3290  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3291  {
3292  typedef IntrinsicTrait<ElementType> IT;
3293 
3294  const size_t M( A.rows() );
3295  const size_t N( B.columns() );
3296  const size_t K( A.columns() );
3297 
3298  size_t i( 0UL );
3299 
3300  for( ; (i+2UL) <= M; i+=2UL )
3301  {
3302  size_t j( 0UL );
3303 
3304  for( ; (j+4UL) <= N; j+=4UL )
3305  {
3306  const size_t kbegin( ( IsUpper<MT4>::value )
3307  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3308  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3309  const size_t kend( ( IsLower<MT4>::value )
3310  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
3311  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
3312 
3313  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3314 
3315  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3316  const IntrinsicType a1( A.load(i ,k) );
3317  const IntrinsicType a2( A.load(i+1UL,k) );
3318  const IntrinsicType b1( B.load(k,j ) );
3319  const IntrinsicType b2( B.load(k,j+1UL) );
3320  const IntrinsicType b3( B.load(k,j+2UL) );
3321  const IntrinsicType b4( B.load(k,j+3UL) );
3322  xmm1 = xmm1 + a1 * b1;
3323  xmm2 = xmm2 + a1 * b2;
3324  xmm3 = xmm3 + a1 * b3;
3325  xmm4 = xmm4 + a1 * b4;
3326  xmm5 = xmm5 + a2 * b1;
3327  xmm6 = xmm6 + a2 * b2;
3328  xmm7 = xmm7 + a2 * b3;
3329  xmm8 = xmm8 + a2 * b4;
3330  }
3331 
3332  (~C)(i ,j ) -= sum( xmm1 );
3333  (~C)(i ,j+1UL) -= sum( xmm2 );
3334  (~C)(i ,j+2UL) -= sum( xmm3 );
3335  (~C)(i ,j+3UL) -= sum( xmm4 );
3336  (~C)(i+1UL,j ) -= sum( xmm5 );
3337  (~C)(i+1UL,j+1UL) -= sum( xmm6 );
3338  (~C)(i+1UL,j+2UL) -= sum( xmm7 );
3339  (~C)(i+1UL,j+3UL) -= sum( xmm8 );
3340  }
3341 
3342  for( ; (j+2UL) <= N; j+=2UL )
3343  {
3344  const size_t kbegin( ( IsUpper<MT4>::value )
3345  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3346  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3347  const size_t kend( ( IsLower<MT4>::value )
3348  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
3349  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3350 
3351  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3352 
3353  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3354  const IntrinsicType a1( A.load(i ,k) );
3355  const IntrinsicType a2( A.load(i+1UL,k) );
3356  const IntrinsicType b1( B.load(k,j ) );
3357  const IntrinsicType b2( B.load(k,j+1UL) );
3358  xmm1 = xmm1 + a1 * b1;
3359  xmm2 = xmm2 + a1 * b2;
3360  xmm3 = xmm3 + a2 * b1;
3361  xmm4 = xmm4 + a2 * b2;
3362  }
3363 
3364  (~C)(i ,j ) -= sum( xmm1 );
3365  (~C)(i ,j+1UL) -= sum( xmm2 );
3366  (~C)(i+1UL,j ) -= sum( xmm3 );
3367  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3368  }
3369 
3370  if( j < N )
3371  {
3372  const size_t kbegin( ( IsUpper<MT4>::value )
3373  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3374  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3375  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
3376 
3377  IntrinsicType xmm1, xmm2;
3378 
3379  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3380  const IntrinsicType b1( B.load(k,j) );
3381  xmm1 = xmm1 + A.load(i ,k) * b1;
3382  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3383  }
3384 
3385  (~C)(i ,j) -= sum( xmm1 );
3386  (~C)(i+1UL,j) -= sum( xmm2 );
3387  }
3388  }
3389 
3390  if( i < M )
3391  {
3392  size_t j( 0UL );
3393 
3394  for( ; (j+4UL) <= N; j+=4UL )
3395  {
3396  const size_t kbegin( ( IsUpper<MT4>::value )
3397  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3398  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3399  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
3400 
3401  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3402 
3403  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3404  const IntrinsicType a1( A.load(i,k) );
3405  xmm1 = xmm1 + a1 * B.load(k,j );
3406  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3407  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3408  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3409  }
3410 
3411  (~C)(i,j ) -= sum( xmm1 );
3412  (~C)(i,j+1UL) -= sum( xmm2 );
3413  (~C)(i,j+2UL) -= sum( xmm3 );
3414  (~C)(i,j+3UL) -= sum( xmm4 );
3415  }
3416 
3417  for( ; (j+2UL) <= N; j+=2UL )
3418  {
3419  const size_t kbegin( ( IsUpper<MT4>::value )
3420  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3421  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3422  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
3423 
3424  IntrinsicType xmm1, xmm2;
3425 
3426  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3427  const IntrinsicType a1( A.load(i,k) );
3428  xmm1 = xmm1 + a1 * B.load(k,j );
3429  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3430  }
3431 
3432  (~C)(i,j ) -= sum( xmm1 );
3433  (~C)(i,j+1UL) -= sum( xmm2 );
3434  }
3435 
3436  if( j < N )
3437  {
3438  const size_t kbegin( ( IsUpper<MT4>::value )
3439  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3440  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3441 
3442  IntrinsicType xmm1;
3443 
3444  for( size_t k=kbegin; k<K; k+=IT::size ) {
3445  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3446  }
3447 
3448  (~C)(i,j) -= sum( xmm1 );
3449  }
3450  }
3451  }
3453  //**********************************************************************************************
3454 
3455  //**Default subtraction assignment to column-major dense matrices (small matrices)**************
3470  template< typename MT3 // Type of the left-hand side target matrix
3471  , typename MT4 // Type of the left-hand side matrix operand
3472  , typename MT5 > // Type of the right-hand side matrix operand
3473  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3474  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3475  {
3476  typedef IntrinsicTrait<ElementType> IT;
3477 
3478  const size_t M( A.rows() );
3479  const size_t N( B.columns() );
3480  const size_t K( A.columns() );
3481 
3482  size_t i( 0UL );
3483 
3484  for( ; (i+4UL) <= M; i+=4UL )
3485  {
3486  size_t j( 0UL );
3487 
3488  for( ; (j+2UL) <= N; j+=2UL )
3489  {
3490  const size_t kbegin( ( IsUpper<MT4>::value )
3491  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3492  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3493  const size_t kend( ( IsLower<MT4>::value )
3494  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
3495  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3496 
3497  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3498 
3499  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3500  const IntrinsicType a1( A.load(i ,k) );
3501  const IntrinsicType a2( A.load(i+1UL,k) );
3502  const IntrinsicType a3( A.load(i+2UL,k) );
3503  const IntrinsicType a4( A.load(i+3UL,k) );
3504  const IntrinsicType b1( B.load(k,j ) );
3505  const IntrinsicType b2( B.load(k,j+1UL) );
3506  xmm1 = xmm1 + a1 * b1;
3507  xmm2 = xmm2 + a1 * b2;
3508  xmm3 = xmm3 + a2 * b1;
3509  xmm4 = xmm4 + a2 * b2;
3510  xmm5 = xmm5 + a3 * b1;
3511  xmm6 = xmm6 + a3 * b2;
3512  xmm7 = xmm7 + a4 * b1;
3513  xmm8 = xmm8 + a4 * b2;
3514  }
3515 
3516  (~C)(i ,j ) -= sum( xmm1 );
3517  (~C)(i ,j+1UL) -= sum( xmm2 );
3518  (~C)(i+1UL,j ) -= sum( xmm3 );
3519  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3520  (~C)(i+2UL,j ) -= sum( xmm5 );
3521  (~C)(i+2UL,j+1UL) -= sum( xmm6 );
3522  (~C)(i+3UL,j ) -= sum( xmm7 );
3523  (~C)(i+3UL,j+1UL) -= sum( xmm8 );
3524  }
3525 
3526  if( j < N )
3527  {
3528  const size_t kbegin( ( IsUpper<MT4>::value )
3529  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3530  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3531  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
3532 
3533  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3534 
3535  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3536  const IntrinsicType b1( B.load(k,j) );
3537  xmm1 = xmm1 + A.load(i ,k) * b1;
3538  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3539  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3540  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3541  }
3542 
3543  (~C)(i ,j) -= sum( xmm1 );
3544  (~C)(i+1UL,j) -= sum( xmm2 );
3545  (~C)(i+2UL,j) -= sum( xmm3 );
3546  (~C)(i+3UL,j) -= sum( xmm4 );
3547  }
3548  }
3549 
3550  for( ; (i+2UL) <= M; i+=2UL )
3551  {
3552  size_t j( 0UL );
3553 
3554  for( ; (j+2UL) <= N; j+=2UL )
3555  {
3556  const size_t kbegin( ( IsUpper<MT4>::value )
3557  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3558  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3559  const size_t kend( ( IsLower<MT4>::value )
3560  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
3561  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
3562 
3563  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3564 
3565  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3566  const IntrinsicType a1( A.load(i ,k) );
3567  const IntrinsicType a2( A.load(i+1UL,k) );
3568  const IntrinsicType b1( B.load(k,j ) );
3569  const IntrinsicType b2( B.load(k,j+1UL) );
3570  xmm1 = xmm1 + a1 * b1;
3571  xmm2 = xmm2 + a1 * b2;
3572  xmm3 = xmm3 + a2 * b1;
3573  xmm4 = xmm4 + a2 * b2;
3574  }
3575 
3576  (~C)(i ,j ) -= sum( xmm1 );
3577  (~C)(i ,j+1UL) -= sum( xmm2 );
3578  (~C)(i+1UL,j ) -= sum( xmm3 );
3579  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
3580  }
3581 
3582  if( j < N )
3583  {
3584  const size_t kbegin( ( IsUpper<MT4>::value )
3585  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3586  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3587  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
3588 
3589  IntrinsicType xmm1, xmm2;
3590 
3591  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3592  const IntrinsicType b1( B.load(k,j) );
3593  xmm1 = xmm1 + A.load(i ,k) * b1;
3594  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3595  }
3596 
3597  (~C)(i ,j) -= sum( xmm1 );
3598  (~C)(i+1UL,j) -= sum( xmm2 );
3599  }
3600  }
3601  if( i < M )
3602  {
3603  size_t j( 0UL );
3604 
3605  for( ; (j+2UL) <= N; j+=2UL )
3606  {
3607  const size_t kbegin( ( IsUpper<MT4>::value )
3608  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3609  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3610  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
3611 
3612  IntrinsicType xmm1, xmm2;
3613 
3614  for( size_t k=kbegin; k<kend; k+=IT::size ) {
3615  const IntrinsicType a1( A.load(i,k) );
3616  xmm1 = xmm1 + a1 * B.load(k,j );
3617  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3618  }
3619 
3620  (~C)(i,j ) -= sum( xmm1 );
3621  (~C)(i,j+1UL) -= sum( xmm2 );
3622  }
3623 
3624  if( j < N )
3625  {
3626  const size_t kbegin( ( IsUpper<MT4>::value )
3627  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
3628  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
3629 
3630  IntrinsicType xmm1;
3631 
3632  for( size_t k=kbegin; k<K; k+=IT::size ) {
3633  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3634  }
3635 
3636  (~C)(i,j) -= sum( xmm1 );
3637  }
3638  }
3639  }
3641  //**********************************************************************************************
3642 
3643  //**Default subtraction assignment to dense matrices (large matrices)***************************
3657  template< typename MT3 // Type of the left-hand side target matrix
3658  , typename MT4 // Type of the left-hand side matrix operand
3659  , typename MT5 > // Type of the right-hand side matrix operand
3660  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3661  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3662  {
3663  selectDefaultSubAssignKernel( ~C, A, B );
3664  }
3666  //**********************************************************************************************
3667 
3668  //**Default subtraction assignment to row-major dense matrices (large matrices)*****************
3683  template< typename MT3 // Type of the left-hand side target matrix
3684  , typename MT4 // Type of the left-hand side matrix operand
3685  , typename MT5 > // Type of the right-hand side matrix operand
3686  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3687  selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
3688  {
3689  // TODO
3690  selectSmallSubAssignKernel( ~C, A, B );
3691  }
3693  //**********************************************************************************************
3694 
3695  //**Default subtraction assignment to column-major dense matrices (large matrices)**************
3710  template< typename MT3 // Type of the left-hand side target matrix
3711  , typename MT4 // Type of the left-hand side matrix operand
3712  , typename MT5 > // Type of the right-hand side matrix operand
3713  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
3714  selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
3715  {
3716  // TODO
3717  selectSmallSubAssignKernel( ~C, A, B );
3718  }
3720  //**********************************************************************************************
3721 
3722  //**Default subtraction assignment to dense matrices********************************************
3736  template< typename MT3 // Type of the left-hand side target matrix
3737  , typename MT4 // Type of the left-hand side matrix operand
3738  , typename MT5 > // Type of the right-hand side matrix operand
3739  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
3740  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3741  {
3742  selectLargeSubAssignKernel( C, A, B );
3743  }
3745  //**********************************************************************************************
3746 
3747  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
3748 #if BLAZE_BLAS_MODE
3749 
3762  template< typename MT3 // Type of the left-hand side target matrix
3763  , typename MT4 // Type of the left-hand side matrix operand
3764  , typename MT5 > // Type of the right-hand side matrix operand
3765  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
3766  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3767  {
3768  if( IsTriangular<MT4>::value ) {
3769  typename MT3::ResultType tmp( B );
3770  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0F );
3771  subAssign( C, tmp );
3772  }
3773  else if( IsTriangular<MT5>::value ) {
3774  typename MT3::ResultType tmp( A );
3775  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0F );
3776  subAssign( C, tmp );
3777  }
3778  else {
3779  sgemm( C, A, B, -1.0F, 1.0F );
3780  }
3781  }
3783 #endif
3784  //**********************************************************************************************
3785 
3786  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
3787 #if BLAZE_BLAS_MODE
3788 
3801  template< typename MT3 // Type of the left-hand side target matrix
3802  , typename MT4 // Type of the left-hand side matrix operand
3803  , typename MT5 > // Type of the right-hand side matrix operand
3804  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
3805  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3806  {
3807  if( IsTriangular<MT4>::value ) {
3808  typename MT3::ResultType tmp( B );
3809  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), 1.0 );
3810  subAssign( C, tmp );
3811  }
3812  else if( IsTriangular<MT5>::value ) {
3813  typename MT3::ResultType tmp( A );
3814  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), 1.0 );
3815  subAssign( C, tmp );
3816  }
3817  else {
3818  dgemm( C, A, B, -1.0, 1.0 );
3819  }
3820  }
3822 #endif
3823  //**********************************************************************************************
3824 
3825  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
3826 #if BLAZE_BLAS_MODE
3827 
3840  template< typename MT3 // Type of the left-hand side target matrix
3841  , typename MT4 // Type of the left-hand side matrix operand
3842  , typename MT5 > // Type of the right-hand side matrix operand
3843  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3844  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3845  {
3846  if( IsTriangular<MT4>::value ) {
3847  typename MT3::ResultType tmp( B );
3848  ctrmm( tmp, A, CblasLeft,
3849  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
3850  complex<float>( 1.0F, 0.0F ) );
3851  subAssign( C, tmp );
3852  }
3853  else if( IsTriangular<MT5>::value ) {
3854  typename MT3::ResultType tmp( A );
3855  ctrmm( tmp, B, CblasRight,
3856  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
3857  complex<float>( 1.0F, 0.0F ) );
3858  subAssign( C, tmp );
3859  }
3860  else {
3861  cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3862  }
3863  }
3865 #endif
3866  //**********************************************************************************************
3867 
3868  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
3869 #if BLAZE_BLAS_MODE
3870 
3883  template< typename MT3 // Type of the left-hand side target matrix
3884  , typename MT4 // Type of the left-hand side matrix operand
3885  , typename MT5 > // Type of the right-hand side matrix operand
3886  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3887  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
3888  {
3889  if( IsTriangular<MT4>::value ) {
3890  typename MT3::ResultType tmp( B );
3891  ztrmm( tmp, A, CblasLeft,
3892  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
3893  complex<float>( 1.0, 0.0 ) );
3894  subAssign( C, tmp );
3895  }
3896  else if( IsTriangular<MT5>::value ) {
3897  typename MT3::ResultType tmp( A );
3898  ztrmm( tmp, B, CblasRight,
3899  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
3900  complex<float>( 1.0, 0.0 ) );
3901  subAssign( C, tmp );
3902  }
3903  else {
3904  zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
3905  }
3906  }
3908 #endif
3909  //**********************************************************************************************
3910 
3911  //**Subtraction assignment to sparse matrices***************************************************
3912  // No special implementation for the subtraction assignment to sparse matrices.
3913  //**********************************************************************************************
3914 
3915  //**Multiplication assignment to dense matrices*************************************************
3916  // No special implementation for the multiplication assignment to dense matrices.
3917  //**********************************************************************************************
3918 
3919  //**Multiplication assignment to sparse matrices************************************************
3920  // No special implementation for the multiplication assignment to sparse matrices.
3921  //**********************************************************************************************
3922 
3923  //**SMP assignment to dense matrices************************************************************
3938  template< typename MT // Type of the target dense matrix
3939  , bool SO > // Storage order of the target dense matrix
3940  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
3941  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
3942  {
3944 
3945  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3946  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3947 
3948  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
3949  return;
3950  }
3951  else if( rhs.lhs_.columns() == 0UL ) {
3952  reset( ~lhs );
3953  return;
3954  }
3955 
3956  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
3957  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
3958 
3959  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
3960  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
3961  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
3962  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
3963  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3964  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
3965 
3966  smpAssign( ~lhs, A * B );
3967  }
3969  //**********************************************************************************************
3970 
3971  //**SMP assignment to sparse matrices***********************************************************
3986  template< typename MT // Type of the target sparse matrix
3987  , bool SO > // Storage order of the target sparse matrix
3988  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
3989  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
3990  {
3992 
3993  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
3994 
4001 
4002  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4003  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4004 
4005  const TmpType tmp( rhs );
4006  smpAssign( ~lhs, tmp );
4007  }
4009  //**********************************************************************************************
4010 
4011  //**SMP addition assignment to dense matrices***************************************************
4027  template< typename MT // Type of the target dense matrix
4028  , bool SO > // Storage order of the target dense matrix
4029  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4030  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4031  {
4033 
4034  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4035  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4036 
4037  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4038  return;
4039  }
4040 
4041  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4042  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4043 
4044  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4045  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4046  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4047  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4048  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4049  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4050 
4051  smpAddAssign( ~lhs, A * B );
4052  }
4054  //**********************************************************************************************
4055 
4056  //**SMP addition assignment to sparse matrices**************************************************
4057  // No special implementation for the SMP addition assignment to sparse matrices.
4058  //**********************************************************************************************
4059 
4060  //**SMP subtraction assignment to dense matrices************************************************
4076  template< typename MT // Type of the target dense matrix
4077  , bool SO > // Storage order of the target dense matrix
4078  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4079  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
4080  {
4082 
4083  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4084  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4085 
4086  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
4087  return;
4088  }
4089 
4090  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
4091  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
4092 
4093  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
4094  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
4095  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
4096  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
4097  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4098  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
4099 
4100  smpSubAssign( ~lhs, A * B );
4101  }
4103  //**********************************************************************************************
4104 
4105  //**SMP subtraction assignment to sparse matrices***********************************************
4106  // No special implementation for the SMP subtraction assignment to sparse matrices.
4107  //**********************************************************************************************
4108 
4109  //**SMP multiplication assignment to dense matrices*********************************************
4110  // No special implementation for the SMP multiplication assignment to dense matrices.
4111  //**********************************************************************************************
4112 
4113  //**SMP multiplication assignment to sparse matrices********************************************
4114  // No special implementation for the SMP multiplication assignment to sparse matrices.
4115  //**********************************************************************************************
4116 
4117  //**Compile time checks*************************************************************************
4125  //**********************************************************************************************
4126 };
4127 //*************************************************************************************************
4128 
4129 
4130 
4131 
4132 //=================================================================================================
4133 //
4134 // DMATSCALARMULTEXPR SPECIALIZATION
4135 //
4136 //=================================================================================================
4137 
4138 //*************************************************************************************************
4146 template< typename MT1 // Type of the left-hand side dense matrix
4147  , typename MT2 // Type of the right-hand side dense matrix
4148  , typename ST > // Type of the right-hand side scalar value
4149 class DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >
4150  : public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
4151  , private MatScalarMultExpr
4152  , private Computation
4153 {
4154  private:
4155  //**Type definitions****************************************************************************
4156  typedef DMatTDMatMultExpr<MT1,MT2> MMM;
4157  typedef typename MMM::ResultType RES;
4158  typedef typename MT1::ResultType RT1;
4159  typedef typename MT2::ResultType RT2;
4160  typedef typename RT1::ElementType ET1;
4161  typedef typename RT2::ElementType ET2;
4162  typedef typename MT1::CompositeType CT1;
4163  typedef typename MT2::CompositeType CT2;
4164  //**********************************************************************************************
4165 
4166  //**********************************************************************************************
4168  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
4169  //**********************************************************************************************
4170 
4171  //**********************************************************************************************
4173  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
4174  //**********************************************************************************************
4175 
4176  //**********************************************************************************************
4178 
4181  template< typename T1, typename T2, typename T3 >
4182  struct IsEvaluationRequired {
4183  enum { value = ( evaluateLeft || evaluateRight ) };
4184  };
4185  //**********************************************************************************************
4186 
4187  //**********************************************************************************************
4189 
4192  template< typename T1, typename T2, typename T3, typename T4 >
4193  struct UseSinglePrecisionKernel {
4194  enum { value = BLAZE_BLAS_MODE &&
4195  HasMutableDataAccess<T1>::value &&
4196  HasConstDataAccess<T2>::value &&
4197  HasConstDataAccess<T3>::value &&
4198  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4199  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4200  IsFloat<typename T1::ElementType>::value &&
4201  IsFloat<typename T2::ElementType>::value &&
4202  IsFloat<typename T3::ElementType>::value &&
4203  !IsComplex<T4>::value };
4204  };
4205  //**********************************************************************************************
4206 
4207  //**********************************************************************************************
4209 
4212  template< typename T1, typename T2, typename T3, typename T4 >
4213  struct UseDoublePrecisionKernel {
4214  enum { value = BLAZE_BLAS_MODE &&
4215  HasMutableDataAccess<T1>::value &&
4216  HasConstDataAccess<T2>::value &&
4217  HasConstDataAccess<T3>::value &&
4218  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4219  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4220  IsDouble<typename T1::ElementType>::value &&
4221  IsDouble<typename T2::ElementType>::value &&
4222  IsDouble<typename T3::ElementType>::value &&
4223  !IsComplex<T4>::value };
4224  };
4225  //**********************************************************************************************
4226 
4227  //**********************************************************************************************
4229 
4232  template< typename T1, typename T2, typename T3 >
4233  struct UseSinglePrecisionComplexKernel {
4234  typedef complex<float> Type;
4235  enum { value = BLAZE_BLAS_MODE &&
4236  HasMutableDataAccess<T1>::value &&
4237  HasConstDataAccess<T2>::value &&
4238  HasConstDataAccess<T3>::value &&
4239  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4240  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4241  IsSame<typename T1::ElementType,Type>::value &&
4242  IsSame<typename T2::ElementType,Type>::value &&
4243  IsSame<typename T3::ElementType,Type>::value };
4244  };
4245  //**********************************************************************************************
4246 
4247  //**********************************************************************************************
4249 
4252  template< typename T1, typename T2, typename T3 >
4253  struct UseDoublePrecisionComplexKernel {
4254  typedef complex<double> Type;
4255  enum { value = BLAZE_BLAS_MODE &&
4256  HasMutableDataAccess<T1>::value &&
4257  HasConstDataAccess<T2>::value &&
4258  HasConstDataAccess<T3>::value &&
4259  !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4260  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4261  IsSame<typename T1::ElementType,Type>::value &&
4262  IsSame<typename T2::ElementType,Type>::value &&
4263  IsSame<typename T3::ElementType,Type>::value };
4264  };
4265  //**********************************************************************************************
4266 
4267  //**********************************************************************************************
4269 
4271  template< typename T1, typename T2, typename T3, typename T4 >
4272  struct UseDefaultKernel {
4273  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
4274  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
4275  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
4276  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
4277  };
4278  //**********************************************************************************************
4279 
4280  //**********************************************************************************************
4282 
4284  template< typename T1, typename T2, typename T3, typename T4 >
4285  struct UseVectorizedDefaultKernel {
4286  enum { value = !IsDiagonal<T2>::value && !IsDiagonal<T3>::value &&
4287  T1::vectorizable && T2::vectorizable && T3::vectorizable &&
4288  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
4289  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
4290  IsSame<typename T1::ElementType,T4>::value &&
4291  IntrinsicTrait<typename T1::ElementType>::addition &&
4292  IntrinsicTrait<typename T1::ElementType>::multiplication };
4293  };
4294  //**********************************************************************************************
4295 
4296  public:
4297  //**Type definitions****************************************************************************
4298  typedef DMatScalarMultExpr<MMM,ST,false> This;
4299  typedef typename MultTrait<RES,ST>::Type ResultType;
4300  typedef typename ResultType::OppositeType OppositeType;
4301  typedef typename ResultType::TransposeType TransposeType;
4302  typedef typename ResultType::ElementType ElementType;
4303  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
4304  typedef const ElementType ReturnType;
4305  typedef const ResultType CompositeType;
4306 
4308  typedef const DMatTDMatMultExpr<MT1,MT2> LeftOperand;
4309 
4311  typedef ST RightOperand;
4312 
4314  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
4315 
4317  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
4318  //**********************************************************************************************
4319 
4320  //**Compilation flags***************************************************************************
4322  enum { vectorizable = !IsDiagonal<MT1>::value && !IsDiagonal<MT2>::value &&
4323  MT1::vectorizable && MT2::vectorizable &&
4324  IsSame<ET1,ET2>::value &&
4325  IsSame<ET1,ST>::value &&
4326  IntrinsicTrait<ET1>::addition &&
4327  IntrinsicTrait<ET1>::multiplication };
4328 
4330  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
4331  !evaluateRight && MT2::smpAssignable };
4332  //**********************************************************************************************
4333 
4334  //**Constructor*********************************************************************************
4340  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
4341  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
4342  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
4343  {}
4344  //**********************************************************************************************
4345 
4346  //**Access operator*****************************************************************************
4353  inline ReturnType operator()( size_t i, size_t j ) const {
4354  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
4355  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
4356  return matrix_(i,j) * scalar_;
4357  }
4358  //**********************************************************************************************
4359 
4360  //**Rows function*******************************************************************************
4365  inline size_t rows() const {
4366  return matrix_.rows();
4367  }
4368  //**********************************************************************************************
4369 
4370  //**Columns function****************************************************************************
4375  inline size_t columns() const {
4376  return matrix_.columns();
4377  }
4378  //**********************************************************************************************
4379 
4380  //**Left operand access*************************************************************************
4385  inline LeftOperand leftOperand() const {
4386  return matrix_;
4387  }
4388  //**********************************************************************************************
4389 
4390  //**Right operand access************************************************************************
4395  inline RightOperand rightOperand() const {
4396  return scalar_;
4397  }
4398  //**********************************************************************************************
4399 
4400  //**********************************************************************************************
4406  template< typename T >
4407  inline bool canAlias( const T* alias ) const {
4408  return matrix_.canAlias( alias );
4409  }
4410  //**********************************************************************************************
4411 
4412  //**********************************************************************************************
4418  template< typename T >
4419  inline bool isAliased( const T* alias ) const {
4420  return matrix_.isAliased( alias );
4421  }
4422  //**********************************************************************************************
4423 
4424  //**********************************************************************************************
4429  inline bool isAligned() const {
4430  return matrix_.isAligned();
4431  }
4432  //**********************************************************************************************
4433 
4434  //**********************************************************************************************
4439  inline bool canSMPAssign() const {
4440  typename MMM::LeftOperand A( matrix_.leftOperand() );
4441  return ( !BLAZE_BLAS_IS_PARALLEL ||
4442  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
4443  ( A.rows() > SMP_DMATTDMATMULT_THRESHOLD );
4444  }
4445  //**********************************************************************************************
4446 
4447  private:
4448  //**Member variables****************************************************************************
4449  LeftOperand matrix_;
4450  RightOperand scalar_;
4451  //**********************************************************************************************
4452 
4453  //**Assignment to dense matrices****************************************************************
4465  template< typename MT // Type of the target dense matrix
4466  , bool SO > // Storage order of the target dense matrix
4467  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4468  {
4470 
4471  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4472  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4473 
4474  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4475  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4476 
4477  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4478  return;
4479  }
4480  else if( left.columns() == 0UL ) {
4481  reset( ~lhs );
4482  return;
4483  }
4484 
4485  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
4486  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
4487 
4488  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4489  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4490  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4491  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4492  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4493  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4494 
4495  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
4496  }
4497  //**********************************************************************************************
4498 
4499  //**Assignment to dense matrices (kernel selection)*********************************************
4510  template< typename MT3 // Type of the left-hand side target matrix
4511  , typename MT4 // Type of the left-hand side matrix operand
4512  , typename MT5 // Type of the right-hand side matrix operand
4513  , typename ST2 > // Type of the scalar value
4514  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4515  {
4516  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
4517  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
4518  selectSmallAssignKernel( C, A, B, scalar );
4519  else
4520  selectBlasAssignKernel( C, A, B, scalar );
4521  }
4522  //**********************************************************************************************
4523 
4524  //**Default assignment to row-major dense matrices (general/general)****************************
4538  template< typename MT3 // Type of the left-hand side target matrix
4539  , typename MT4 // Type of the left-hand side matrix operand
4540  , typename MT5 // Type of the right-hand side matrix operand
4541  , typename ST2 > // Type of the scalar value
4542  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
4543  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4544  {
4545  const size_t M( A.rows() );
4546  const size_t N( B.columns() );
4547  const size_t K( A.columns() );
4548 
4549  const size_t ibegin( ( IsStrictlyLower<MT4>::value )
4550  ?( ( IsStrictlyLower<MT5>::value && M > 1UL ) ? 2UL : 1UL )
4551  :( 0UL ) );
4552  const size_t iend( ( IsStrictlyUpper<MT4>::value )
4553  ?( ( IsStrictlyUpper<MT5>::value && M > 1UL ) ? M-2UL : M-1UL )
4554  :( M ) );
4555  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4556 
4557  for( size_t i=0UL; i<ibegin; ++i ) {
4558  for( size_t j=0UL; j<N; ++j ) {
4559  reset( (~C)(i,j) );
4560  }
4561  }
4562  for( size_t i=ibegin; i<iend; ++i )
4563  {
4564  const size_t jbegin( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4565  ?( ( IsStrictlyUpper<MT4>::value )
4566  ?( IsStrictlyUpper<MT5>::value ? i+2UL : i+1UL )
4567  :( IsStrictlyUpper<MT5>::value ? i+1UL : i ) )
4568  :( IsStrictlyUpper<MT5>::value ? 1UL : 0UL ) );
4569  const size_t jend( ( IsLower<MT4>::value && IsLower<MT5>::value )
4570  ?( ( IsStrictlyLower<MT4>::value )
4571  ?( IsStrictlyLower<MT5>::value ? i-1UL : i )
4572  :( IsStrictlyLower<MT5>::value ? i : i+1UL ) )
4573  :( IsStrictlyLower<MT5>::value ? N-1UL : N ) );
4574  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4575 
4576  for( size_t j=0UL; j<jbegin; ++j ) {
4577  reset( (~C)(i,j) );
4578  }
4579  for( size_t j=jbegin; j<jend; ++j )
4580  {
4581  const size_t kbegin( ( IsUpper<MT4>::value )
4582  ?( ( IsLower<MT5>::value )
4583  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4584  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4585  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4586  :( ( IsLower<MT5>::value )
4587  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4588  :( 0UL ) ) );
4589  const size_t kend( ( IsLower<MT4>::value )
4590  ?( ( IsUpper<MT5>::value )
4591  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
4592  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
4593  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4594  :( ( IsUpper<MT5>::value )
4595  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4596  :( K ) ) );
4597  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
4598 
4599  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
4600  for( size_t k=kbegin+1UL; k<kend; ++k ) {
4601  (~C)(i,j) += A(i,k) * B(k,j);
4602  }
4603  (~C)(i,j) *= scalar;
4604  }
4605  for( size_t j=jend; j<N; ++j ) {
4606  reset( (~C)(i,j) );
4607  }
4608  }
4609  for( size_t i=iend; i<M; ++i ) {
4610  for( size_t j=0UL; j<N; ++j ) {
4611  reset( (~C)(i,j) );
4612  }
4613  }
4614  }
4615  //**********************************************************************************************
4616 
4617  //**Default assignment to column-major dense matrices (general/general)*************************
4631  template< typename MT3 // Type of the left-hand side target matrix
4632  , typename MT4 // Type of the left-hand side matrix operand
4633  , typename MT5 // Type of the right-hand side matrix operand
4634  , typename ST2 > // Type of the scalar value
4635  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
4636  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4637  {
4638  const size_t M( A.rows() );
4639  const size_t N( B.columns() );
4640  const size_t K( A.columns() );
4641 
4642  const size_t jbegin( ( IsStrictlyUpper<MT5>::value )
4643  ?( ( IsStrictlyUpper<MT4>::value && N > 1UL ) ? 2UL : 1UL )
4644  :( 0UL ) );
4645  const size_t jend( ( IsStrictlyLower<MT5>::value )
4646  ?( ( IsStrictlyLower<MT4>::value && N > 1UL ) ? N-2UL : N-1UL )
4647  :( N ) );
4648  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4649 
4650  for( size_t j=0UL; j<jbegin; ++j ) {
4651  for( size_t i=0UL; i<M; ++i ) {
4652  reset( (~C)(i,j) );
4653  }
4654  }
4655  for( size_t j=jbegin; j<jend; ++j )
4656  {
4657  const size_t ibegin( ( IsLower<MT4>::value && IsLower<MT5>::value )
4658  ?( ( IsStrictlyLower<MT4>::value )
4659  ?( IsStrictlyLower<MT5>::value ? j+2UL : j+1UL )
4660  :( IsStrictlyLower<MT5>::value ? j+1UL : j ) )
4661  :( IsStrictlyLower<MT4>::value ? 1UL : 0UL ) );
4662  const size_t iend( ( IsUpper<MT4>::value && IsUpper<MT5>::value )
4663  ?( ( IsStrictlyUpper<MT4>::value )
4664  ?( ( IsStrictlyUpper<MT5>::value )?( j-1UL ):( j ) )
4665  :( ( IsStrictlyUpper<MT5>::value )?( j ):( j+1UL ) ) )
4666  :( IsStrictlyUpper<MT4>::value ? M-1UL : M ) );
4667  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4668 
4669  for( size_t i=0UL; i<ibegin; ++i ) {
4670  reset( (~C)(i,j) );
4671  }
4672  for( size_t i=ibegin; i<iend; ++i )
4673  {
4674  const size_t kbegin( ( IsUpper<MT4>::value )
4675  ?( ( IsLower<MT5>::value )
4676  ?( max( ( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4677  , ( IsStrictlyLower<MT5>::value ? j+1UL : j ) ) )
4678  :( IsStrictlyUpper<MT4>::value ? i+1UL : i ) )
4679  :( ( IsLower<MT5>::value )
4680  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4681  :( 0UL ) ) );
4682  const size_t kend( ( IsLower<MT4>::value )
4683  ?( ( IsUpper<MT5>::value )
4684  ?( min( ( IsStrictlyLower<MT4>::value ? i : i+1UL )
4685  , ( IsStrictlyUpper<MT5>::value ? j : j+1UL ) ) )
4686  :( IsStrictlyLower<MT4>::value ? i : i+1UL ) )
4687  :( ( IsUpper<MT5>::value )
4688  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4689  :( K ) ) );
4690  BLAZE_INTERNAL_ASSERT( kbegin < kend, "Invalid loop indices detected" );
4691 
4692  (~C)(i,j) = A(i,kbegin) * B(kbegin,j);
4693  for( size_t k=kbegin+1UL; k<kend; ++k ) {
4694  (~C)(i,j) += A(i,k) * B(k,j);
4695  }
4696  (~C)(i,j) *= scalar;
4697  }
4698  for( size_t i=iend; i<M; ++i ) {
4699  reset( (~C)(i,j) );
4700  }
4701  }
4702  for( size_t j=jend; j<N; ++j ) {
4703  for( size_t i=0UL; i<M; ++i ) {
4704  reset( (~C)(i,j) );
4705  }
4706  }
4707  }
4708  //**********************************************************************************************
4709 
4710  //**Default assignment to row-major dense matrices (general/diagonal)***************************
4724  template< typename MT3 // Type of the left-hand side target matrix
4725  , typename MT4 // Type of the left-hand side matrix operand
4726  , typename MT5 // Type of the right-hand side matrix operand
4727  , typename ST2 > // Type of the scalar value
4728  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
4729  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4730  {
4731  const size_t M( A.rows() );
4732  const size_t N( B.columns() );
4733 
4734  for( size_t i=0UL; i<M; ++i )
4735  {
4736  const size_t jbegin( ( IsUpper<MT4>::value )
4737  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
4738  :( 0UL ) );
4739  const size_t jend( ( IsLower<MT4>::value )
4740  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
4741  :( N ) );
4742  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
4743 
4744  if( IsUpper<MT4>::value ) {
4745  for( size_t j=0UL; j<jbegin; ++j ) {
4746  reset( (~C)(i,j) );
4747  }
4748  }
4749  for( size_t j=jbegin; j<jend; ++j ) {
4750  (~C)(i,j) = A(i,j) * B(j,j) * scalar;
4751  }
4752  if( IsLower<MT4>::value ) {
4753  for( size_t j=jend; j<N; ++j ) {
4754  reset( (~C)(i,j) );
4755  }
4756  }
4757  }
4758  }
4759  //**********************************************************************************************
4760 
4761  //**Default assignment to column-major dense matrices (general/diagonal)************************
4775  template< typename MT3 // Type of the left-hand side target matrix
4776  , typename MT4 // Type of the left-hand side matrix operand
4777  , typename MT5 // Type of the right-hand side matrix operand
4778  , typename ST2 > // Type of the scalar value
4779  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
4780  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4781  {
4782  const size_t M( A.rows() );
4783  const size_t N( B.columns() );
4784 
4785  const size_t block( 16UL );
4786 
4787  for( size_t jj=0UL; jj<N; jj+=block ) {
4788  const size_t jend( min( N, jj+block ) );
4789  for( size_t ii=0UL; ii<M; ii+=block ) {
4790  const size_t iend( min( M, ii+block ) );
4791  for( size_t j=jj; j<jend; ++j )
4792  {
4793  const size_t ibegin( ( IsLower<MT4>::value )
4794  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
4795  :( ii ) );
4796  const size_t ipos( ( IsUpper<MT4>::value )
4797  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
4798  :( iend ) );
4799 
4800  if( IsLower<MT4>::value ) {
4801  for( size_t i=ii; i<ibegin; ++i ) {
4802  reset( (~C)(i,j) );
4803  }
4804  }
4805  for( size_t i=ibegin; i<ipos; ++i ) {
4806  (~C)(i,j) = A(i,j) * B(j,j) * scalar;
4807  }
4808  if( IsUpper<MT4>::value ) {
4809  for( size_t i=ipos; i<iend; ++i ) {
4810  reset( (~C)(i,j) );
4811  }
4812  }
4813  }
4814  }
4815  }
4816  }
4817  //**********************************************************************************************
4818 
4819  //**Default assignment to row-major dense matrices (diagonal/general)***************************
4833  template< typename MT3 // Type of the left-hand side target matrix
4834  , typename MT4 // Type of the left-hand side matrix operand
4835  , typename MT5 // Type of the right-hand side matrix operand
4836  , typename ST2 > // Type of the scalar value
4837  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
4838  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
4839  {
4840  const size_t M( A.rows() );
4841  const size_t N( B.columns() );
4842 
4843  const size_t block( 16UL );
4844 
4845  for( size_t ii=0UL; ii<M; ii+=block ) {
4846  const size_t iend( min( M, ii+block ) );
4847  for( size_t jj=0UL; jj<N; jj+=block ) {
4848  const size_t jend( min( N, jj+block ) );
4849  for( size_t i=ii; i<iend; ++i )
4850  {
4851  const size_t jbegin( ( IsUpper<MT5>::value )
4852  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
4853  :( jj ) );
4854  const size_t jpos( ( IsLower<MT5>::value )
4855  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
4856  :( jend ) );
4857 
4858  if( IsUpper<MT5>::value ) {
4859  for( size_t j=jj; j<jbegin; ++j ) {
4860  reset( (~C)(i,j) );
4861  }
4862  }
4863  for( size_t j=jbegin; j<jpos; ++j ) {
4864  (~C)(i,j) = A(i,i) * B(i,j) * scalar;
4865  }
4866  if( IsLower<MT5>::value ) {
4867  for( size_t j=jpos; j<jend; ++j ) {
4868  reset( (~C)(i,j) );
4869  }
4870  }
4871  }
4872  }
4873  }
4874  }
4875  //**********************************************************************************************
4876 
4877  //**Default assignment to column-major dense matrices (diagonal/general)************************
4891  template< typename MT3 // Type of the left-hand side target matrix
4892  , typename MT4 // Type of the left-hand side matrix operand
4893  , typename MT5 // Type of the right-hand side matrix operand
4894  , typename ST2 > // Type of the scalar value
4895  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
4896  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4897  {
4898  const size_t M( A.rows() );
4899  const size_t N( B.columns() );
4900 
4901  for( size_t j=0UL; j<N; ++j )
4902  {
4903  const size_t ibegin( ( IsLower<MT5>::value )
4904  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
4905  :( 0UL ) );
4906  const size_t iend( ( IsUpper<MT5>::value )
4907  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
4908  :( M ) );
4909  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
4910 
4911  if( IsLower<MT5>::value ) {
4912  for( size_t i=0UL; i<ibegin; ++i ) {
4913  reset( (~C)(i,j) );
4914  }
4915  }
4916  for( size_t i=ibegin; i<iend; ++i ) {
4917  (~C)(i,j) = A(i,i) * B(i,j) * scalar;
4918  }
4919  if( IsUpper<MT5>::value ) {
4920  for( size_t i=iend; i<M; ++i ) {
4921  reset( (~C)(i,j) );
4922  }
4923  }
4924  }
4925  }
4926  //**********************************************************************************************
4927 
4928  //**Default assignment to dense matrices (diagonal/diagonal)************************************
4942  template< typename MT3 // Type of the left-hand side target matrix
4943  , typename MT4 // Type of the left-hand side matrix operand
4944  , typename MT5 // Type of the right-hand side matrix operand
4945  , typename ST2 > // Type of the scalar value
4946  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
4947  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4948  {
4949  reset( C );
4950 
4951  for( size_t i=0UL; i<A.rows(); ++i ) {
4952  C(i,i) = A(i,i) * B(i,i) * scalar;
4953  }
4954  }
4955  //**********************************************************************************************
4956 
4957  //**Default assignment to dense matrices (small matrices)***************************************
4971  template< typename MT3 // Type of the left-hand side target matrix
4972  , typename MT4 // Type of the left-hand side matrix operand
4973  , typename MT5 // Type of the right-hand side matrix operand
4974  , typename ST2 > // Type of the scalar value
4975  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4976  selectSmallAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4977  {
4978  selectDefaultAssignKernel( C, A, B, scalar );
4979  }
4980  //**********************************************************************************************
4981 
4982  //**Vectorized default assignment to row-major dense matrices (small matrices)******************
4997  template< typename MT3 // Type of the left-hand side target matrix
4998  , typename MT4 // Type of the left-hand side matrix operand
4999  , typename MT5 // Type of the right-hand side matrix operand
5000  , typename ST2 > // Type of the scalar value
5001  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5002  selectSmallAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5003  {
5004  typedef IntrinsicTrait<ElementType> IT;
5005 
5006  const size_t M( A.rows() );
5007  const size_t N( B.columns() );
5008  const size_t K( A.columns() );
5009 
5010  size_t i( 0UL );
5011 
5012  for( ; (i+2UL) <= M; i+=2UL )
5013  {
5014  size_t j( 0UL );
5015 
5016  for( ; (j+4UL) <= N; j+=4UL )
5017  {
5018  const size_t kbegin( ( IsUpper<MT4>::value )
5019  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5020  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5021  const size_t kend( ( IsLower<MT4>::value )
5022  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
5023  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
5024 
5025  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5026 
5027  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5028  const IntrinsicType a1( A.load(i ,k) );
5029  const IntrinsicType a2( A.load(i+1UL,k) );
5030  const IntrinsicType b1( B.load(k,j ) );
5031  const IntrinsicType b2( B.load(k,j+1UL) );
5032  const IntrinsicType b3( B.load(k,j+2UL) );
5033  const IntrinsicType b4( B.load(k,j+3UL) );
5034  xmm1 = xmm1 + a1 * b1;
5035  xmm2 = xmm2 + a1 * b2;
5036  xmm3 = xmm3 + a1 * b3;
5037  xmm4 = xmm4 + a1 * b4;
5038  xmm5 = xmm5 + a2 * b1;
5039  xmm6 = xmm6 + a2 * b2;
5040  xmm7 = xmm7 + a2 * b3;
5041  xmm8 = xmm8 + a2 * b4;
5042  }
5043 
5044  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5045  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5046  (~C)(i ,j+2UL) = sum( xmm3 ) * scalar;
5047  (~C)(i ,j+3UL) = sum( xmm4 ) * scalar;
5048  (~C)(i+1UL,j ) = sum( xmm5 ) * scalar;
5049  (~C)(i+1UL,j+1UL) = sum( xmm6 ) * scalar;
5050  (~C)(i+1UL,j+2UL) = sum( xmm7 ) * scalar;
5051  (~C)(i+1UL,j+3UL) = sum( xmm8 ) * scalar;
5052  }
5053 
5054  for( ; (j+2UL) <= N; j+=2UL )
5055  {
5056  const size_t kbegin( ( IsUpper<MT4>::value )
5057  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5058  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5059  const size_t kend( ( IsLower<MT4>::value )
5060  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
5061  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5062 
5063  IntrinsicType xmm1, xmm2, xmm3, xmm4;
5064 
5065  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5066  const IntrinsicType a1( A.load(i ,k) );
5067  const IntrinsicType a2( A.load(i+1UL,k) );
5068  const IntrinsicType b1( B.load(k,j ) );
5069  const IntrinsicType b2( B.load(k,j+1UL) );
5070  xmm1 = xmm1 + a1 * b1;
5071  xmm2 = xmm2 + a1 * b2;
5072  xmm3 = xmm3 + a2 * b1;
5073  xmm4 = xmm4 + a2 * b2;
5074  }
5075 
5076  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5077  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5078  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5079  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5080  }
5081 
5082  if( j < N )
5083  {
5084  const size_t kbegin( ( IsUpper<MT4>::value )
5085  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5086  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5087  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
5088 
5089  IntrinsicType xmm1, xmm2;
5090 
5091  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5092  const IntrinsicType b1( B.load(k,j) );
5093  xmm1 = xmm1 + A.load(i ,k) * b1;
5094  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
5095  }
5096 
5097  (~C)(i ,j) = sum( xmm1 ) * scalar;
5098  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5099  }
5100  }
5101 
5102  if( i < M )
5103  {
5104  size_t j( 0UL );
5105 
5106  for( ; (j+4UL) <= N; j+=4UL )
5107  {
5108  const size_t kbegin( ( IsUpper<MT4>::value )
5109  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5110  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5111  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
5112 
5113  IntrinsicType xmm1, xmm2, xmm3, xmm4;
5114 
5115  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5116  const IntrinsicType a1( A.load(i,k) );
5117  xmm1 = xmm1 + a1 * B.load(k,j );
5118  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
5119  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
5120  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
5121  }
5122 
5123  (~C)(i,j ) = sum( xmm1 ) * scalar;
5124  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5125  (~C)(i,j+2UL) = sum( xmm3 ) * scalar;
5126  (~C)(i,j+3UL) = sum( xmm4 ) * scalar;
5127  }
5128 
5129  for( ; (j+2UL) <= N; j+=2UL )
5130  {
5131  const size_t kbegin( ( IsUpper<MT4>::value )
5132  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5133  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5134  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
5135 
5136  IntrinsicType xmm1, xmm2;
5137 
5138  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5139  const IntrinsicType a1( A.load(i,k) );
5140  xmm1 = xmm1 + a1 * B.load(k,j );
5141  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
5142  }
5143 
5144  (~C)(i,j ) = sum( xmm1 ) * scalar;
5145  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5146  }
5147 
5148  if( j < N )
5149  {
5150  const size_t kbegin( ( IsUpper<MT4>::value )
5151  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5152  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5153 
5154  IntrinsicType xmm1;
5155 
5156  for( size_t k=kbegin; k<K; k+=IT::size ) {
5157  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
5158  }
5159 
5160  (~C)(i,j) = sum( xmm1 ) * scalar;
5161  }
5162  }
5163  }
5164  //**********************************************************************************************
5165 
5166  //**Vectorized default assignment to column-major dense matrices (small matrices)***************
5181  template< typename MT3 // Type of the left-hand side target matrix
5182  , typename MT4 // Type of the left-hand side matrix operand
5183  , typename MT5 // Type of the right-hand side matrix operand
5184  , typename ST2 > // Type of the scalar value
5185  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5186  selectSmallAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5187  {
5188  typedef IntrinsicTrait<ElementType> IT;
5189 
5190  const size_t M( A.rows() );
5191  const size_t N( B.columns() );
5192  const size_t K( A.columns() );
5193 
5194  size_t i( 0UL );
5195 
5196  for( ; (i+4UL) <= M; i+=4UL )
5197  {
5198  size_t j( 0UL );
5199 
5200  for( ; (j+2UL) <= N; j+=2UL )
5201  {
5202  const size_t kbegin( ( IsUpper<MT4>::value )
5203  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5204  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5205  const size_t kend( ( IsLower<MT4>::value )
5206  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
5207  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5208 
5209  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
5210 
5211  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5212  const IntrinsicType a1( A.load(i ,k) );
5213  const IntrinsicType a2( A.load(i+1UL,k) );
5214  const IntrinsicType a3( A.load(i+2UL,k) );
5215  const IntrinsicType a4( A.load(i+3UL,k) );
5216  const IntrinsicType b1( B.load(k,j ) );
5217  const IntrinsicType b2( B.load(k,j+1UL) );
5218  xmm1 = xmm1 + a1 * b1;
5219  xmm2 = xmm2 + a1 * b2;
5220  xmm3 = xmm3 + a2 * b1;
5221  xmm4 = xmm4 + a2 * b2;
5222  xmm5 = xmm5 + a3 * b1;
5223  xmm6 = xmm6 + a3 * b2;
5224  xmm7 = xmm7 + a4 * b1;
5225  xmm8 = xmm8 + a4 * b2;
5226  }
5227 
5228  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5229  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5230  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5231  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5232  (~C)(i+2UL,j ) = sum( xmm5 ) * scalar;
5233  (~C)(i+2UL,j+1UL) = sum( xmm6 ) * scalar;
5234  (~C)(i+3UL,j ) = sum( xmm7 ) * scalar;
5235  (~C)(i+3UL,j+1UL) = sum( xmm8 ) * scalar;
5236  }
5237 
5238  if( j < N )
5239  {
5240  const size_t kbegin( ( IsUpper<MT4>::value )
5241  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5242  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5243  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
5244 
5245  IntrinsicType xmm1, xmm2, xmm3, xmm4;
5246 
5247  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5248  const IntrinsicType b1( B.load(k,j) );
5249  xmm1 = xmm1 + A.load(i ,k) * b1;
5250  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
5251  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
5252  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
5253  }
5254 
5255  (~C)(i ,j) = sum( xmm1 ) * scalar;
5256  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5257  (~C)(i+2UL,j) = sum( xmm3 ) * scalar;
5258  (~C)(i+3UL,j) = sum( xmm4 ) * scalar;
5259  }
5260  }
5261 
5262  for( ; (i+2UL) <= M; i+=2UL )
5263  {
5264  size_t j( 0UL );
5265 
5266  for( ; (j+2UL) <= N; j+=2UL )
5267  {
5268  const size_t kbegin( ( IsUpper<MT4>::value )
5269  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5270  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5271  const size_t kend( ( IsLower<MT4>::value )
5272  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
5273  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
5274 
5275  IntrinsicType xmm1, xmm2, xmm3, xmm4;
5276 
5277  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5278  const IntrinsicType a1( A.load(i ,k) );
5279  const IntrinsicType a2( A.load(i+1UL,k) );
5280  const IntrinsicType b1( B.load(k,j ) );
5281  const IntrinsicType b2( B.load(k,j+1UL) );
5282  xmm1 = xmm1 + a1 * b1;
5283  xmm2 = xmm2 + a1 * b2;
5284  xmm3 = xmm3 + a2 * b1;
5285  xmm4 = xmm4 + a2 * b2;
5286  }
5287 
5288  (~C)(i ,j ) = sum( xmm1 ) * scalar;
5289  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
5290  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
5291  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
5292  }
5293 
5294  if( j < N )
5295  {
5296  const size_t kbegin( ( IsUpper<MT4>::value )
5297  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5298  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5299  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
5300 
5301  IntrinsicType xmm1, xmm2;
5302 
5303  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5304  const IntrinsicType b1( B.load(k,j) );
5305  xmm1 = xmm1 + A.load(i ,k) * b1;
5306  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
5307  }
5308 
5309  (~C)(i ,j) = sum( xmm1 ) * scalar;
5310  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
5311  }
5312  }
5313 
5314  if( i < M )
5315  {
5316  size_t j( 0UL );
5317 
5318  for( ; (j+2UL) <= N; j+=2UL )
5319  {
5320  const size_t kbegin( ( IsUpper<MT4>::value )
5321  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5322  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5323  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
5324 
5325  IntrinsicType xmm1, xmm2;
5326 
5327  for( size_t k=kbegin; k<kend; k+=IT::size ) {
5328  const IntrinsicType a1( A.load(i,k) );
5329  xmm1 = xmm1 + a1 * B.load(k,j );
5330  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
5331  }
5332 
5333  (~C)(i,j ) = sum( xmm1 ) * scalar;
5334  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
5335  }
5336 
5337  if( j < N )
5338  {
5339  const size_t kbegin( ( IsUpper<MT4>::value )
5340  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
5341  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
5342 
5343  IntrinsicType xmm1;
5344 
5345  for( size_t k=kbegin; k<K; k+=IT::size ) {
5346  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
5347  }
5348 
5349  (~C)(i,j) = sum( xmm1 ) * scalar;
5350  }
5351  }
5352  }
5353  //**********************************************************************************************
5354 
5355  //**Default assignment to dense matrices (large matrices)***************************************
5369  template< typename MT3 // Type of the left-hand side target matrix
5370  , typename MT4 // Type of the left-hand side matrix operand
5371  , typename MT5 // Type of the right-hand side matrix operand
5372  , typename ST2 > // Type of the scalar value
5373  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5374  selectLargeAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5375  {
5376  selectDefaultAssignKernel( C, A, B, scalar );
5377  }
5378  //**********************************************************************************************
5379 
5380  //**Vectorized default assignment to row-major dense matrices (large matrices)******************
5395  template< typename MT3 // Type of the left-hand side target matrix
5396  , typename MT4 // Type of the left-hand side matrix operand
5397  , typename MT5 // Type of the right-hand side matrix operand
5398  , typename ST2 > // Type of the scalar value
5399  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5400  selectLargeAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5401  {
5402  // TODO
5403  selectSmallAssignKernel( ~C, A, B, scalar );
5404  }
5405  //**********************************************************************************************
5406 
5407  //**Vectorized default assignment to column-major dense matrices (large matrices)***************
5422  template< typename MT3 // Type of the left-hand side target matrix
5423  , typename MT4 // Type of the left-hand side matrix operand
5424  , typename MT5 // Type of the right-hand side matrix operand
5425  , typename ST2 > // Type of the scalar value
5426  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5427  selectLargeAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5428  {
5429  // TODO
5430  selectSmallAssignKernel( ~C, A, B, scalar );
5431  }
5432  //**********************************************************************************************
5433 
5434  //**BLAS-based assignment to dense matrices (default)*******************************************
5448  template< typename MT3 // Type of the left-hand side target matrix
5449  , typename MT4 // Type of the left-hand side matrix operand
5450  , typename MT5 // Type of the right-hand side matrix operand
5451  , typename ST2 > // Type of the scalar value
5452  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5453  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5454  {
5455  selectLargeAssignKernel( C, A, B, scalar );
5456  }
5457  //**********************************************************************************************
5458 
5459  //**BLAS-based assignment to dense matrices (single precision)**********************************
5460 #if BLAZE_BLAS_MODE
5461 
5474  template< typename MT3 // Type of the left-hand side target matrix
5475  , typename MT4 // Type of the left-hand side matrix operand
5476  , typename MT5 // Type of the right-hand side matrix operand
5477  , typename ST2 > // Type of the scalar value
5478  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
5479  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5480  {
5481  if( IsTriangular<MT4>::value ) {
5482  assign( C, B );
5483  strmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
5484  }
5485  else if( IsTriangular<MT5>::value ) {
5486  assign( C, A );
5487  strmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
5488  }
5489  else {
5490  sgemm( C, A, B, scalar, 0.0F );
5491  }
5492  }
5493 #endif
5494  //**********************************************************************************************
5495 
5496  //**BLAS-based assignment to dense matrices (double precision)**********************************
5497 #if BLAZE_BLAS_MODE
5498 
5511  template< typename MT3 // Type of the left-hand side target matrix
5512  , typename MT4 // Type of the left-hand side matrix operand
5513  , typename MT5 // Type of the right-hand side matrix operand
5514  , typename ST2 > // Type of the scalar value
5515  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
5516  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5517  {
5518  if( IsTriangular<MT4>::value ) {
5519  assign( C, B );
5520  dtrmm( C, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
5521  }
5522  else if( IsTriangular<MT5>::value ) {
5523  assign( C, A );
5524  dtrmm( C, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
5525  }
5526  else {
5527  dgemm( C, A, B, scalar, 0.0 );
5528  }
5529  }
5530 #endif
5531  //**********************************************************************************************
5532 
5533  //**BLAS-based assignment to dense matrices (single precision complex)**************************
5534 #if BLAZE_BLAS_MODE
5535 
5548  template< typename MT3 // Type of the left-hand side target matrix
5549  , typename MT4 // Type of the left-hand side matrix operand
5550  , typename MT5 // Type of the right-hand side matrix operand
5551  , typename ST2 > // Type of the scalar value
5552  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
5553  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5554  {
5555  if( IsTriangular<MT4>::value ) {
5556  assign( C, B );
5557  ctrmm( C, A, CblasLeft,
5558  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
5559  complex<float>( scalar, 0.0F ) );
5560  }
5561  else if( IsTriangular<MT5>::value ) {
5562  assign( C, A );
5563  ctrmm( C, B, CblasRight,
5564  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
5565  complex<float>( scalar, 0.0F ) );
5566  }
5567  else {
5568  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
5569  }
5570  }
5571 #endif
5572  //**********************************************************************************************
5573 
5574  //**BLAS-based assignment to dense matrices (double precision complex)**************************
5575 #if BLAZE_BLAS_MODE
5576 
5589  template< typename MT3 // Type of the left-hand side target matrix
5590  , typename MT4 // Type of the left-hand side matrix operand
5591  , typename MT5 // Type of the right-hand side matrix operand
5592  , typename ST2 > // Type of the scalar value
5593  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
5594  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5595  {
5596  if( IsTriangular<MT4>::value ) {
5597  assign( C, B );
5598  ztrmm( C, A, CblasLeft,
5599  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
5600  complex<double>( scalar, 0.0 ) );
5601  }
5602  else if( IsTriangular<MT5>::value ) {
5603  assign( C, A );
5604  ztrmm( C, B, CblasRight,
5605  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
5606  complex<double>( scalar, 0.0 ) );
5607  }
5608  else {
5609  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
5610  }
5611  }
5612 #endif
5613  //**********************************************************************************************
5614 
5615  //**Assignment to sparse matrices***************************************************************
5627  template< typename MT // Type of the target sparse matrix
5628  , bool SO > // Storage order of the target sparse matrix
5629  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5630  {
5632 
5633  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
5634 
5641 
5642  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5643  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5644 
5645  const TmpType tmp( serial( rhs ) );
5646  assign( ~lhs, tmp );
5647  }
5648  //**********************************************************************************************
5649 
5650  //**Addition assignment to dense matrices*******************************************************
5662  template< typename MT // Type of the target dense matrix
5663  , bool SO > // Storage order of the target dense matrix
5664  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
5665  {
5667 
5668  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
5669  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
5670 
5671  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
5672  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
5673 
5674  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
5675  return;
5676  }
5677 
5678  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
5679  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
5680 
5681  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
5682  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
5683  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
5684  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
5685  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
5686  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
5687 
5688  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
5689  }
5690  //**********************************************************************************************
5691 
5692  //**Addition assignment to dense matrices (kernel selection)************************************
5703  template< typename MT3 // Type of the left-hand side target matrix
5704  , typename MT4 // Type of the left-hand side matrix operand
5705  , typename MT5 // Type of the right-hand side matrix operand
5706  , typename ST2 > // Type of the scalar value
5707  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5708  {
5709  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
5710  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
5711  selectSmallAddAssignKernel( C, A, B, scalar );
5712  else
5713  selectBlasAddAssignKernel( C, A, B, scalar );
5714  }
5715  //**********************************************************************************************
5716 
5717  //**Default addition assignment to dense matrices (general/general)*****************************
5731  template< typename MT3 // Type of the left-hand side target matrix
5732  , typename MT4 // Type of the left-hand side matrix operand
5733  , typename MT5 // Type of the right-hand side matrix operand
5734  , typename ST2 > // Type of the scalar value
5735  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
5736  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5737  {
5738  const ResultType tmp( serial( A * B * scalar ) );
5739  addAssign( C, tmp );
5740  }
5741  //**********************************************************************************************
5742 
5743  //**Default addition assignment to row-major dense matrices (general/diagonal)******************
5757  template< typename MT3 // Type of the left-hand side target matrix
5758  , typename MT4 // Type of the left-hand side matrix operand
5759  , typename MT5 // Type of the right-hand side matrix operand
5760  , typename ST2 > // Type of the scalar value
5761  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
5762  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5763  {
5764  const size_t M( A.rows() );
5765  const size_t N( B.columns() );
5766 
5767  for( size_t i=0UL; i<M; ++i )
5768  {
5769  const size_t jbegin( ( IsUpper<MT4>::value )
5770  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
5771  :( 0UL ) );
5772  const size_t jend( ( IsLower<MT4>::value )
5773  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
5774  :( N ) );
5775  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
5776 
5777  const size_t jnum( jend - jbegin );
5778  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
5779 
5780  for( size_t j=jbegin; j<jpos; j+=2UL ) {
5781  (~C)(i,j ) += A(i,j ) * B(j ,j ) * scalar;
5782  (~C)(i,j+1UL) += A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
5783  }
5784  if( jpos < jend ) {
5785  (~C)(i,jpos) += A(i,jpos) * B(jpos,jpos) * scalar;
5786  }
5787  }
5788  }
5789  //**********************************************************************************************
5790 
5791  //**Default addition assignment to column-major dense matrices (general/diagonal)***************
5805  template< typename MT3 // Type of the left-hand side target matrix
5806  , typename MT4 // Type of the left-hand side matrix operand
5807  , typename MT5 // Type of the right-hand side matrix operand
5808  , typename ST2 > // Type of the scalar value
5809  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
5810  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5811  {
5812  const size_t M( A.rows() );
5813  const size_t N( B.columns() );
5814 
5815  const size_t block( 16UL );
5816 
5817  for( size_t jj=0UL; jj<N; jj+=block ) {
5818  const size_t jend( min( N, jj+block ) );
5819  for( size_t ii=0UL; ii<M; ii+=block ) {
5820  const size_t iend( min( M, ii+block ) );
5821  for( size_t j=jj; j<jend; ++j )
5822  {
5823  const size_t ibegin( ( IsLower<MT4>::value )
5824  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
5825  :( ii ) );
5826  const size_t ipos( ( IsUpper<MT4>::value )
5827  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
5828  :( iend ) );
5829 
5830  for( size_t i=ibegin; i<ipos; ++i ) {
5831  (~C)(i,j) += A(i,j) * B(j,j) * scalar;
5832  }
5833  }
5834  }
5835  }
5836  }
5837  //**********************************************************************************************
5838 
5839  //**Default addition assignment to row-major dense matrices (diagonal/general)******************
5853  template< typename MT3 // Type of the left-hand side target matrix
5854  , typename MT4 // Type of the left-hand side matrix operand
5855  , typename MT5 // Type of the right-hand side matrix operand
5856  , typename ST2 > // Type of the scalar value
5857  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
5858  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
5859  {
5860  const size_t M( A.rows() );
5861  const size_t N( B.columns() );
5862 
5863  const size_t block( 16UL );
5864 
5865  for( size_t ii=0UL; ii<M; ii+=block ) {
5866  const size_t iend( min( M, ii+block ) );
5867  for( size_t jj=0UL; jj<N; jj+=block ) {
5868  const size_t jend( min( N, jj+block ) );
5869  for( size_t i=ii; i<iend; ++i )
5870  {
5871  const size_t jbegin( ( IsUpper<MT5>::value )
5872  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
5873  :( jj ) );
5874  const size_t jpos( ( IsLower<MT5>::value )
5875  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
5876  :( jend ) );
5877 
5878  for( size_t j=jbegin; j<jpos; ++j ) {
5879  (~C)(i,j) += A(i,i) * B(i,j) * scalar;
5880  }
5881  }
5882  }
5883  }
5884  }
5885  //**********************************************************************************************
5886 
5887  //**Default addition assignment to column-major dense matrices (diagonal/general)***************
5901  template< typename MT3 // Type of the left-hand side target matrix
5902  , typename MT4 // Type of the left-hand side matrix operand
5903  , typename MT5 // Type of the right-hand side matrix operand
5904  , typename ST2 > // Type of the scalar value
5905  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
5906  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
5907  {
5908  const size_t M( A.rows() );
5909  const size_t N( B.columns() );
5910 
5911  for( size_t j=0UL; j<N; ++j )
5912  {
5913  const size_t ibegin( ( IsLower<MT5>::value )
5914  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
5915  :( 0UL ) );
5916  const size_t iend( ( IsUpper<MT5>::value )
5917  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
5918  :( M ) );
5919  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
5920 
5921  const size_t inum( iend - ibegin );
5922  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
5923 
5924  for( size_t i=ibegin; i<ipos; i+=2UL ) {
5925  (~C)(i ,j) += A(i ,i ) * B(i ,j) * scalar;
5926  (~C)(i+1UL,j) += A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
5927  }
5928  if( ipos < iend ) {
5929  (~C)(ipos,j) += A(ipos,ipos) * B(ipos,j) * scalar;
5930  }
5931  }
5932  }
5933  //**********************************************************************************************
5934 
5935  //**Default addition assignment to dense matrices (diagonal/diagonal)***************************
5949  template< typename MT3 // Type of the left-hand side target matrix
5950  , typename MT4 // Type of the left-hand side matrix operand
5951  , typename MT5 // Type of the right-hand side matrix operand
5952  , typename ST2 > // Type of the scalar value
5953  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
5954  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5955  {
5956  for( size_t i=0UL; i<A.rows(); ++i ) {
5957  C(i,i) += A(i,i) * B(i,i) * scalar;
5958  }
5959  }
5960  //**********************************************************************************************
5961 
5962  //**Default addition assignment to dense matrices (small matrices)******************************
5976  template< typename MT3 // Type of the left-hand side target matrix
5977  , typename MT4 // Type of the left-hand side matrix operand
5978  , typename MT5 // Type of the right-hand side matrix operand
5979  , typename ST2 > // Type of the scalar value
5980  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
5981  selectSmallAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
5982  {
5983  selectDefaultAddAssignKernel( C, A, B, scalar );
5984  }
5985  //**********************************************************************************************
5986 
5987  //**Vectorized default addition assignment to row-major dense matrices (small matrices)*********
6002  template< typename MT3 // Type of the left-hand side target matrix
6003  , typename MT4 // Type of the left-hand side matrix operand
6004  , typename MT5 // Type of the right-hand side matrix operand
6005  , typename ST2 > // Type of the scalar value
6006  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6007  selectSmallAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6008  {
6009  typedef IntrinsicTrait<ElementType> IT;
6010 
6011  const size_t M( A.rows() );
6012  const size_t N( B.columns() );
6013  const size_t K( A.columns() );
6014 
6015  size_t i( 0UL );
6016 
6017  for( ; (i+2UL) <= M; i+=2UL )
6018  {
6019  size_t j( 0UL );
6020 
6021  for( ; (j+4UL) <= N; j+=4UL )
6022  {
6023  const size_t kbegin( ( IsUpper<MT4>::value )
6024  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6025  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6026  const size_t kend( ( IsLower<MT4>::value )
6027  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
6028  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
6029 
6030  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6031 
6032  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6033  const IntrinsicType a1( A.load(i ,k) );
6034  const IntrinsicType a2( A.load(i+1UL,k) );
6035  const IntrinsicType b1( B.load(k,j ) );
6036  const IntrinsicType b2( B.load(k,j+1UL) );
6037  const IntrinsicType b3( B.load(k,j+2UL) );
6038  const IntrinsicType b4( B.load(k,j+3UL) );
6039  xmm1 = xmm1 + a1 * b1;
6040  xmm2 = xmm2 + a1 * b2;
6041  xmm3 = xmm3 + a1 * b3;
6042  xmm4 = xmm4 + a1 * b4;
6043  xmm5 = xmm5 + a2 * b1;
6044  xmm6 = xmm6 + a2 * b2;
6045  xmm7 = xmm7 + a2 * b3;
6046  xmm8 = xmm8 + a2 * b4;
6047  }
6048 
6049  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6050  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6051  (~C)(i ,j+2UL) += sum( xmm3 ) * scalar;
6052  (~C)(i ,j+3UL) += sum( xmm4 ) * scalar;
6053  (~C)(i+1UL,j ) += sum( xmm5 ) * scalar;
6054  (~C)(i+1UL,j+1UL) += sum( xmm6 ) * scalar;
6055  (~C)(i+1UL,j+2UL) += sum( xmm7 ) * scalar;
6056  (~C)(i+1UL,j+3UL) += sum( xmm8 ) * scalar;
6057  }
6058 
6059  for( ; (j+2UL) <= N; j+=2UL )
6060  {
6061  const size_t kbegin( ( IsUpper<MT4>::value )
6062  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6063  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6064  const size_t kend( ( IsLower<MT4>::value )
6065  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
6066  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6067 
6068  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6069 
6070  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6071  const IntrinsicType a1( A.load(i ,k) );
6072  const IntrinsicType a2( A.load(i+1UL,k) );
6073  const IntrinsicType b1( B.load(k,j ) );
6074  const IntrinsicType b2( B.load(k,j+1UL) );
6075  xmm1 = xmm1 + a1 * b1;
6076  xmm2 = xmm2 + a1 * b2;
6077  xmm3 = xmm3 + a2 * b1;
6078  xmm4 = xmm4 + a2 * b2;
6079  }
6080 
6081  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6082  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6083  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6084  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6085  }
6086 
6087  if( j < N )
6088  {
6089  const size_t kbegin( ( IsUpper<MT4>::value )
6090  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6091  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6092  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
6093 
6094  IntrinsicType xmm1, xmm2;
6095 
6096  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6097  const IntrinsicType b1( B.load(k,j) );
6098  xmm1 = xmm1 + A.load(i ,k) * b1;
6099  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
6100  }
6101 
6102  (~C)(i ,j) += sum( xmm1 ) * scalar;
6103  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6104  }
6105  }
6106 
6107  if( i < M )
6108  {
6109  size_t j( 0UL );
6110 
6111  for( ; (j+4UL) <= N; j+=4UL )
6112  {
6113  const size_t kbegin( ( IsUpper<MT4>::value )
6114  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6115  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6116  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
6117 
6118  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6119 
6120  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6121  const IntrinsicType a1( A.load(i,k) );
6122  xmm1 = xmm1 + a1 * B.load(k,j );
6123  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
6124  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
6125  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
6126  }
6127 
6128  (~C)(i,j ) += sum( xmm1 ) * scalar;
6129  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6130  (~C)(i,j+2UL) += sum( xmm3 ) * scalar;
6131  (~C)(i,j+3UL) += sum( xmm4 ) * scalar;
6132  }
6133 
6134  for( ; (j+2UL) <= N; j+=2UL )
6135  {
6136  const size_t kbegin( ( IsUpper<MT4>::value )
6137  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6138  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6139  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
6140 
6141  IntrinsicType xmm1, xmm2;
6142 
6143  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6144  const IntrinsicType a1( A.load(i,k) );
6145  xmm1 = xmm1 + a1 * B.load(k,j );
6146  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
6147  }
6148 
6149  (~C)(i,j ) += sum( xmm1 ) * scalar;
6150  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6151  }
6152 
6153  if( j < N )
6154  {
6155  const size_t kbegin( ( IsUpper<MT4>::value )
6156  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6157  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6158 
6159  IntrinsicType xmm1;
6160 
6161  for( size_t k=kbegin; k<K; k+=IT::size ) {
6162  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
6163  }
6164 
6165  (~C)(i,j) += sum( xmm1 ) * scalar;
6166  }
6167  }
6168  }
6169  //**********************************************************************************************
6170 
6171  //**Vectorized default addition assignment to column-major dense matrices (small matrices)******
6186  template< typename MT3 // Type of the left-hand side target matrix
6187  , typename MT4 // Type of the left-hand side matrix operand
6188  , typename MT5 // Type of the right-hand side matrix operand
6189  , typename ST2 > // Type of the scalar value
6190  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6191  selectSmallAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6192  {
6193  typedef IntrinsicTrait<ElementType> IT;
6194 
6195  const size_t M( A.rows() );
6196  const size_t N( B.columns() );
6197  const size_t K( A.columns() );
6198 
6199  size_t i( 0UL );
6200 
6201  for( ; (i+4UL) <= M; i+=4UL )
6202  {
6203  size_t j( 0UL );
6204 
6205  for( ; (j+2UL) <= N; j+=2UL )
6206  {
6207  const size_t kbegin( ( IsUpper<MT4>::value )
6208  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6209  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6210  const size_t kend( ( IsLower<MT4>::value )
6211  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
6212  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6213 
6214  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
6215 
6216  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6217  const IntrinsicType a1( A.load(i ,k) );
6218  const IntrinsicType a2( A.load(i+1UL,k) );
6219  const IntrinsicType a3( A.load(i+2UL,k) );
6220  const IntrinsicType a4( A.load(i+3UL,k) );
6221  const IntrinsicType b1( B.load(k,j ) );
6222  const IntrinsicType b2( B.load(k,j+1UL) );
6223  xmm1 = xmm1 + a1 * b1;
6224  xmm2 = xmm2 + a1 * b2;
6225  xmm3 = xmm3 + a2 * b1;
6226  xmm4 = xmm4 + a2 * b2;
6227  xmm5 = xmm5 + a3 * b1;
6228  xmm6 = xmm6 + a3 * b2;
6229  xmm7 = xmm7 + a4 * b1;
6230  xmm8 = xmm8 + a4 * b2;
6231  }
6232 
6233  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6234  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6235  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6236  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6237  (~C)(i+2UL,j ) += sum( xmm5 ) * scalar;
6238  (~C)(i+2UL,j+1UL) += sum( xmm6 ) * scalar;
6239  (~C)(i+3UL,j ) += sum( xmm7 ) * scalar;
6240  (~C)(i+3UL,j+1UL) += sum( xmm8 ) * scalar;
6241  }
6242 
6243  if( j < N )
6244  {
6245  const size_t kbegin( ( IsUpper<MT4>::value )
6246  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6247  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6248  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
6249 
6250  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6251 
6252  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6253  const IntrinsicType b1( B.load(k,j) );
6254  xmm1 = xmm1 + A.load(i ,k) * b1;
6255  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
6256  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
6257  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
6258  }
6259 
6260  (~C)(i ,j) += sum( xmm1 ) * scalar;
6261  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6262  (~C)(i+2UL,j) += sum( xmm3 ) * scalar;
6263  (~C)(i+3UL,j) += sum( xmm4 ) * scalar;
6264  }
6265  }
6266 
6267  for( ; (i+2UL) <= M; i+=2UL )
6268  {
6269  size_t j( 0UL );
6270 
6271  for( ; (j+2UL) <= N; j+=2UL )
6272  {
6273  const size_t kbegin( ( IsUpper<MT4>::value )
6274  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6275  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6276  const size_t kend( ( IsLower<MT4>::value )
6277  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
6278  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
6279 
6280  IntrinsicType xmm1, xmm2, xmm3, xmm4;
6281 
6282  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6283  const IntrinsicType a1( A.load(i ,k) );
6284  const IntrinsicType a2( A.load(i+1UL,k) );
6285  const IntrinsicType b1( B.load(k,j ) );
6286  const IntrinsicType b2( B.load(k,j+1UL) );
6287  xmm1 = xmm1 + a1 * b1;
6288  xmm2 = xmm2 + a1 * b2;
6289  xmm3 = xmm3 + a2 * b1;
6290  xmm4 = xmm4 + a2 * b2;
6291  }
6292 
6293  (~C)(i ,j ) += sum( xmm1 ) * scalar;
6294  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
6295  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
6296  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
6297  }
6298 
6299  if( j < N )
6300  {
6301  const size_t kbegin( ( IsUpper<MT4>::value )
6302  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6303  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6304  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
6305 
6306  IntrinsicType xmm1, xmm2;
6307 
6308  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6309  const IntrinsicType b1( B.load(k,j) );
6310  xmm1 = xmm1 + A.load(i ,k) * b1;
6311  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
6312  }
6313 
6314  (~C)(i ,j) += sum( xmm1 ) * scalar;
6315  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
6316  }
6317  }
6318 
6319  if( i < M )
6320  {
6321  size_t j( 0UL );
6322 
6323  for( ; (j+2UL) <= N; j+=2UL )
6324  {
6325  const size_t kbegin( ( IsUpper<MT4>::value )
6326  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6327  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6328  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
6329 
6330  IntrinsicType xmm1, xmm2;
6331 
6332  for( size_t k=kbegin; k<kend; k+=IT::size ) {
6333  const IntrinsicType a1( A.load(i,k) );
6334  xmm1 = xmm1 + a1 * B.load(k,j );
6335  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
6336  }
6337 
6338  (~C)(i,j ) += sum( xmm1 ) * scalar;
6339  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
6340  }
6341 
6342  if( j < N )
6343  {
6344  const size_t kbegin( ( IsUpper<MT4>::value )
6345  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
6346  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
6347 
6348  IntrinsicType xmm1;
6349 
6350  for( size_t k=kbegin; k<K; k+=IT::size ) {
6351  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
6352  }
6353 
6354  (~C)(i,j) += sum( xmm1 ) * scalar;
6355  }
6356  }
6357  }
6358  //**********************************************************************************************
6359 
6360  //**Default addition assignment to dense matrices (large matrices)******************************
6374  template< typename MT3 // Type of the left-hand side target matrix
6375  , typename MT4 // Type of the left-hand side matrix operand
6376  , typename MT5 // Type of the right-hand side matrix operand
6377  , typename ST2 > // Type of the scalar value
6378  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6379  selectLargeAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6380  {
6381  selectDefaultAddAssignKernel( C, A, B, scalar );
6382  }
6383  //**********************************************************************************************
6384 
6385  //**Vectorized default addition assignment to row-major dense matrices (large matrices)*********
6400  template< typename MT3 // Type of the left-hand side target matrix
6401  , typename MT4 // Type of the left-hand side matrix operand
6402  , typename MT5 // Type of the right-hand side matrix operand
6403  , typename ST2 > // Type of the scalar value
6404  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6405  selectLargeAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6406  {
6407  // TODO
6408  selectSmallAddAssignKernel( ~C, A, B, scalar );
6409  }
6410  //**********************************************************************************************
6411 
6412  //**Vectorized default addition assignment to column-major dense matrices (large matrices)******
6427  template< typename MT3 // Type of the left-hand side target matrix
6428  , typename MT4 // Type of the left-hand side matrix operand
6429  , typename MT5 // Type of the right-hand side matrix operand
6430  , typename ST2 > // Type of the scalar value
6431  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6432  selectLargeAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6433  {
6434  // TODO
6435  selectSmallAddAssignKernel( ~C, A, B, scalar );
6436  }
6437  //**********************************************************************************************
6438 
6439  //**BLAS-based addition assignment to dense matrices (default)**********************************
6453  template< typename MT3 // Type of the left-hand side target matrix
6454  , typename MT4 // Type of the left-hand side matrix operand
6455  , typename MT5 // Type of the right-hand side matrix operand
6456  , typename ST2 > // Type of the scalar value
6457  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6458  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6459  {
6460  selectLargeAddAssignKernel( C, A, B, scalar );
6461  }
6462  //**********************************************************************************************
6463 
6464  //**BLAS-based addition assignment to dense matrices (single precision)*************************
6465 #if BLAZE_BLAS_MODE
6466 
6479  template< typename MT3 // Type of the left-hand side target matrix
6480  , typename MT4 // Type of the left-hand side matrix operand
6481  , typename MT5 // Type of the right-hand side matrix operand
6482  , typename ST2 > // Type of the scalar value
6483  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
6484  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6485  {
6486  if( IsTriangular<MT4>::value ) {
6487  typename MT3::ResultType tmp( B );
6488  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
6489  addAssign( C, tmp );
6490  }
6491  else if( IsTriangular<MT5>::value ) {
6492  typename MT3::ResultType tmp( A );
6493  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
6494  addAssign( C, tmp );
6495  }
6496  else {
6497  sgemm( C, A, B, scalar, 1.0F );
6498  }
6499  }
6500 #endif
6501  //**********************************************************************************************
6502 
6503  //**BLAS-based addition assignment to dense matrices (double precision)*************************
6504 #if BLAZE_BLAS_MODE
6505 
6518  template< typename MT3 // Type of the left-hand side target matrix
6519  , typename MT4 // Type of the left-hand side matrix operand
6520  , typename MT5 // Type of the right-hand side matrix operand
6521  , typename ST2 > // Type of the scalar value
6522  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
6523  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6524  {
6525  if( IsTriangular<MT4>::value ) {
6526  typename MT3::ResultType tmp( B );
6527  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
6528  addAssign( C, tmp );
6529  }
6530  else if( IsTriangular<MT5>::value ) {
6531  typename MT3::ResultType tmp( A );
6532  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
6533  addAssign( C, tmp );
6534  }
6535  else {
6536  dgemm( C, A, B, scalar, 1.0 );
6537  }
6538  }
6539 #endif
6540  //**********************************************************************************************
6541 
6542  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
6543 #if BLAZE_BLAS_MODE
6544 
6557  template< typename MT3 // Type of the left-hand side target matrix
6558  , typename MT4 // Type of the left-hand side matrix operand
6559  , typename MT5 // Type of the right-hand side matrix operand
6560  , typename ST2 > // Type of the scalar value
6561  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
6562  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6563  {
6564  if( IsTriangular<MT4>::value ) {
6565  typename MT3::ResultType tmp( B );
6566  ctrmm( tmp, A, CblasLeft,
6567  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
6568  complex<float>( scalar, 0.0F ) );
6569  addAssign( C, tmp );
6570  }
6571  else if( IsTriangular<MT5>::value ) {
6572  typename MT3::ResultType tmp( A );
6573  ctrmm( tmp, B, CblasRight,
6574  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
6575  complex<float>( scalar, 0.0F ) );
6576  addAssign( C, tmp );
6577  }
6578  else {
6579  cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
6580  }
6581  }
6582 #endif
6583  //**********************************************************************************************
6584 
6585  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
6586 #if BLAZE_BLAS_MODE
6587 
6600  template< typename MT3 // Type of the left-hand side target matrix
6601  , typename MT4 // Type of the left-hand side matrix operand
6602  , typename MT5 // Type of the right-hand side matrix operand
6603  , typename ST2 > // Type of the scalar value
6604  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
6605  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6606  {
6607  if( IsTriangular<MT4>::value ) {
6608  typename MT3::ResultType tmp( B );
6609  ztrmm( tmp, A, CblasLeft,
6610  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
6611  complex<double>( scalar, 0.0 ) );
6612  addAssign( C, tmp );
6613  }
6614  else if( IsTriangular<MT5>::value ) {
6615  typename MT3::ResultType tmp( A );
6616  ztrmm( tmp, B, CblasRight,
6617  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
6618  complex<double>( scalar, 0.0 ) );
6619  addAssign( C, tmp );
6620  }
6621  else {
6622  zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
6623  }
6624  }
6625 #endif
6626  //**********************************************************************************************
6627 
6628  //**Addition assignment to sparse matrices******************************************************
6629  // No special implementation for the addition assignment to sparse matrices.
6630  //**********************************************************************************************
6631 
6632  //**Subtraction assignment to dense matrices****************************************************
6644  template< typename MT // Type of the target dense matrix
6645  , bool SO > // Storage order of the target dense matrix
6646  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
6647  {
6649 
6650  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
6651  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
6652 
6653  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
6654  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
6655 
6656  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
6657  return;
6658  }
6659 
6660  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
6661  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
6662 
6663  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
6664  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
6665  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
6666  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
6667  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
6668  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
6669 
6670  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
6671  }
6672  //**********************************************************************************************
6673 
6674  //**Subtraction assignment to dense matrices (kernel selection)*********************************
6685  template< typename MT3 // Type of the left-hand side target matrix
6686  , typename MT4 // Type of the left-hand side matrix operand
6687  , typename MT5 // Type of the right-hand side matrix operand
6688  , typename ST2 > // Type of the scalar value
6689  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6690  {
6691  if( ( IsDiagonal<MT4>::value || IsDiagonal<MT5>::value ) ||
6692  ( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD ) )
6693  selectSmallSubAssignKernel( C, A, B, scalar );
6694  else
6695  selectBlasSubAssignKernel( C, A, B, scalar );
6696  }
6697  //**********************************************************************************************
6698 
6699  //**Default subtraction assignment to dense matrices (general/general)**************************
6713  template< typename MT3 // Type of the left-hand side target matrix
6714  , typename MT4 // Type of the left-hand side matrix operand
6715  , typename MT5 // Type of the right-hand side matrix operand
6716  , typename ST2 > // Type of the scalar value
6717  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, Not< IsDiagonal<MT5> > > >::Type
6718  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6719  {
6720  const ResultType tmp( serial( A * B * scalar ) );
6721  subAssign( C, tmp );
6722  }
6723  //**********************************************************************************************
6724 
6725  //**Default subtraction assignment to row-major dense matrices (general/diagonal)***************
6739  template< typename MT3 // Type of the left-hand side target matrix
6740  , typename MT4 // Type of the left-hand side matrix operand
6741  , typename MT5 // Type of the right-hand side matrix operand
6742  , typename ST2 > // Type of the scalar value
6743  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
6744  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6745  {
6746  const size_t M( A.rows() );
6747  const size_t N( B.columns() );
6748 
6749  for( size_t i=0UL; i<M; ++i )
6750  {
6751  const size_t jbegin( ( IsUpper<MT4>::value )
6752  ?( IsStrictlyUpper<MT4>::value ? i+1UL : i )
6753  :( 0UL ) );
6754  const size_t jend( ( IsLower<MT4>::value )
6755  ?( IsStrictlyLower<MT4>::value ? i : i+1UL )
6756  :( N ) );
6757  BLAZE_INTERNAL_ASSERT( jbegin <= jend, "Invalid loop indices detected" );
6758 
6759  const size_t jnum( jend - jbegin );
6760  const size_t jpos( jbegin + ( jnum & size_t(-2) ) );
6761 
6762  for( size_t j=jbegin; j<jpos; j+=2UL ) {
6763  (~C)(i,j ) -= A(i,j ) * B(j ,j ) * scalar;
6764  (~C)(i,j+1UL) -= A(i,j+1UL) * B(j+1UL,j+1UL) * scalar;
6765  }
6766  if( jpos < jend ) {
6767  (~C)(i,jpos) -= A(i,jpos) * B(jpos,jpos) * scalar;
6768  }
6769  }
6770  }
6771  //**********************************************************************************************
6772 
6773  //**Default subtraction assignment to column-major dense matrices (general/diagonal)************
6787  template< typename MT3 // Type of the left-hand side target matrix
6788  , typename MT4 // Type of the left-hand side matrix operand
6789  , typename MT5 // Type of the right-hand side matrix operand
6790  , typename ST2 > // Type of the scalar value
6791  static inline typename EnableIf< And< Not< IsDiagonal<MT4> >, IsDiagonal<MT5> > >::Type
6792  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6793  {
6794  const size_t M( A.rows() );
6795  const size_t N( B.columns() );
6796 
6797  const size_t block( 16UL );
6798 
6799  for( size_t jj=0UL; jj<N; jj+=block ) {
6800  const size_t jend( min( N, jj+block ) );
6801  for( size_t ii=0UL; ii<M; ii+=block ) {
6802  const size_t iend( min( M, ii+block ) );
6803  for( size_t j=jj; j<jend; ++j )
6804  {
6805  const size_t ibegin( ( IsLower<MT4>::value )
6806  ?( max( ( IsStrictlyLower<MT4>::value ? j+1UL : j ), ii ) )
6807  :( ii ) );
6808  const size_t ipos( ( IsUpper<MT4>::value )
6809  ?( min( ( IsStrictlyUpper<MT4>::value ? j : j+1UL ), iend ) )
6810  :( iend ) );
6811 
6812  for( size_t i=ibegin; i<ipos; ++i ) {
6813  (~C)(i,j) -= A(i,j) * B(j,j) * scalar;
6814  }
6815  }
6816  }
6817  }
6818  }
6819  //**********************************************************************************************
6820 
6821  //**Default subtraction assignment to row-major dense matrices (diagonal/general)***************
6836  template< typename MT3 // Type of the left-hand side target matrix
6837  , typename MT4 // Type of the left-hand side matrix operand
6838  , typename MT5 // Type of the right-hand side matrix operand
6839  , typename ST2 > // Type of the scalar value
6840  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
6841  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6842  {
6843  const size_t M( A.rows() );
6844  const size_t N( B.columns() );
6845 
6846  const size_t block( 16UL );
6847 
6848  for( size_t ii=0UL; ii<M; ii+=block ) {
6849  const size_t iend( min( M, ii+block ) );
6850  for( size_t jj=0UL; jj<N; jj+=block ) {
6851  const size_t jend( min( N, jj+block ) );
6852  for( size_t i=ii; i<iend; ++i )
6853  {
6854  const size_t jbegin( ( IsUpper<MT5>::value )
6855  ?( max( ( IsStrictlyUpper<MT5>::value ? i+1UL : i ), jj ) )
6856  :( jj ) );
6857  const size_t jpos( ( IsLower<MT5>::value )
6858  ?( min( ( IsStrictlyLower<MT5>::value ? i : i+1UL ), jend ) )
6859  :( jend ) );
6860 
6861  for( size_t j=jbegin; j<jpos; ++j ) {
6862  (~C)(i,j) -= A(i,i) * B(i,j) * scalar;
6863  }
6864  }
6865  }
6866  }
6867  }
6868  //**********************************************************************************************
6869 
6870  //**Default subtraction assignment to column-major dense matrices (diagonal/general)************
6885  template< typename MT3 // Type of the left-hand side target matrix
6886  , typename MT4 // Type of the left-hand side matrix operand
6887  , typename MT5 // Type of the right-hand side matrix operand
6888  , typename ST2 > // Type of the scalar value
6889  static inline typename EnableIf< And< IsDiagonal<MT4>, Not< IsDiagonal<MT5> > > >::Type
6890  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
6891  {
6892  const size_t M( A.rows() );
6893  const size_t N( B.columns() );
6894 
6895  for( size_t j=0UL; j<N; ++j )
6896  {
6897  const size_t ibegin( ( IsLower<MT5>::value )
6898  ?( IsStrictlyLower<MT5>::value ? j+1UL : j )
6899  :( 0UL ) );
6900  const size_t iend( ( IsUpper<MT5>::value )
6901  ?( IsStrictlyUpper<MT5>::value ? j : j+1UL )
6902  :( M ) );
6903  BLAZE_INTERNAL_ASSERT( ibegin <= iend, "Invalid loop indices detected" );
6904 
6905  const size_t inum( iend - ibegin );
6906  const size_t ipos( ibegin + ( inum & size_t(-2) ) );
6907 
6908  for( size_t i=ibegin; i<ipos; i+=2UL ) {
6909  (~C)(i ,j) -= A(i ,i ) * B(i ,j) * scalar;
6910  (~C)(i+1UL,j) -= A(i+1UL,i+1UL) * B(i+1UL,j) * scalar;
6911  }
6912  if( ipos < iend ) {
6913  (~C)(ipos,j) -= A(ipos,ipos) * B(ipos,j) * scalar;
6914  }
6915  }
6916  }
6917  //**********************************************************************************************
6918 
6919  //**Default subtraction assignment to dense matrices (diagonal/diagonal)************************
6933  template< typename MT3 // Type of the left-hand side target matrix
6934  , typename MT4 // Type of the left-hand side matrix operand
6935  , typename MT5 // Type of the right-hand side matrix operand
6936  , typename ST2 > // Type of the scalar value
6937  static inline typename EnableIf< And< IsDiagonal<MT4>, IsDiagonal<MT5> > >::Type
6938  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6939  {
6940  for( size_t i=0UL; i<A.rows(); ++i ) {
6941  C(i,i) -= A(i,i) * B(i,i) * scalar;
6942  }
6943  }
6944  //**********************************************************************************************
6945 
6946  //**Default subtraction assignment to dense matrices (small matrices)***************************
6960  template< typename MT3 // Type of the left-hand side target matrix
6961  , typename MT4 // Type of the left-hand side matrix operand
6962  , typename MT5 // Type of the right-hand side matrix operand
6963  , typename ST2 > // Type of the scalar value
6964  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6965  selectSmallSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
6966  {
6967  selectDefaultSubAssignKernel( C, A, B, scalar );
6968  }
6969  //**********************************************************************************************
6970 
6971  //**Vectorized default subtraction assignment to row-major dense matrices (small matrices)******
6986  template< typename MT3 // Type of the left-hand side target matrix
6987  , typename MT4 // Type of the left-hand side matrix operand
6988  , typename MT5 // Type of the right-hand side matrix operand
6989  , typename ST2 > // Type of the scalar value
6990  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
6991  selectSmallSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
6992  {
6993  typedef IntrinsicTrait<ElementType> IT;
6994 
6995  const size_t M( A.rows() );
6996  const size_t N( B.columns() );
6997  const size_t K( A.columns() );
6998 
6999  size_t i( 0UL );
7000 
7001  for( ; (i+2UL) <= M; i+=2UL )
7002  {
7003  size_t j( 0UL );
7004 
7005  for( ; (j+4UL) <= N; j+=4UL )
7006  {
7007  const size_t kbegin( ( IsUpper<MT4>::value )
7008  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7009  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7010  const size_t kend( ( IsLower<MT4>::value )
7011  ?( IsUpper<MT5>::value ? min( i+2UL, j+4UL ) : ( i+2UL ) )
7012  :( IsUpper<MT5>::value ? ( j+4UL ) : K ) );
7013 
7014  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7015 
7016  for( size_t k=kbegin; k<kend; k+=IT::size ) {
7017  const IntrinsicType a1( A.load(i ,k) );
7018  const IntrinsicType a2( A.load(i+1UL,k) );
7019  const IntrinsicType b1( B.load(k,j ) );
7020  const IntrinsicType b2( B.load(k,j+1UL) );
7021  const IntrinsicType b3( B.load(k,j+2UL) );
7022  const IntrinsicType b4( B.load(k,j+3UL) );
7023  xmm1 = xmm1 + a1 * b1;
7024  xmm2 = xmm2 + a1 * b2;
7025  xmm3 = xmm3 + a1 * b3;
7026  xmm4 = xmm4 + a1 * b4;
7027  xmm5 = xmm5 + a2 * b1;
7028  xmm6 = xmm6 + a2 * b2;
7029  xmm7 = xmm7 + a2 * b3;
7030  xmm8 = xmm8 + a2 * b4;
7031  }
7032 
7033  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7034  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7035  (~C)(i ,j+2UL) -= sum( xmm3 ) * scalar;
7036  (~C)(i ,j+3UL) -= sum( xmm4 ) * scalar;
7037  (~C)(i+1UL,j ) -= sum( xmm5 ) * scalar;
7038  (~C)(i+1UL,j+1UL) -= sum( xmm6 ) * scalar;
7039  (~C)(i+1UL,j+2UL) -= sum( xmm7 ) * scalar;
7040  (~C)(i+1UL,j+3UL) -= sum( xmm8 ) * scalar;
7041  }
7042 
7043  for( ; (j+2UL) <= N; j+=2UL )
7044  {
7045  const size_t kbegin( ( IsUpper<MT4>::value )
7046  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7047  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7048  const size_t kend( ( IsLower<MT4>::value )
7049  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
7050  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7051 
7052  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7053 
7054  for( size_t k=kbegin; k<kend; k+=IT::size ) {
7055  const IntrinsicType a1( A.load(i ,k) );
7056  const IntrinsicType a2( A.load(i+1UL,k) );
7057  const IntrinsicType b1( B.load(k,j ) );
7058  const IntrinsicType b2( B.load(k,j+1UL) );
7059  xmm1 = xmm1 + a1 * b1;
7060  xmm2 = xmm2 + a1 * b2;
7061  xmm3 = xmm3 + a2 * b1;
7062  xmm4 = xmm4 + a2 * b2;
7063  }
7064 
7065  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7066  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7067  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7068  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7069  }
7070 
7071  if( j < N )
7072  {
7073  const size_t kbegin( ( IsUpper<MT4>::value )
7074  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7075  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7076  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
7077 
7078  IntrinsicType xmm1, xmm2;
7079 
7080  for( size_t k=kbegin; k<kend; k+=IT::size ) {
7081  const IntrinsicType b1( B.load(k,j) );
7082  xmm1 = xmm1 + A.load(i ,k) * b1;
7083  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
7084  }
7085 
7086  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7087  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7088  }
7089  }
7090 
7091  if( i < M )
7092  {
7093  size_t j( 0UL );
7094 
7095  for( ; (j+4UL) <= N; j+=4UL )
7096  {
7097  const size_t kbegin( ( IsUpper<MT4>::value )
7098  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7099  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7100  const size_t kend( ( IsUpper<MT5>::value )?( j+4UL ):( K ) );
7101 
7102  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7103 
7104  for( size_t k=kbegin; k<kend; k+=IT::size ) {
7105  const IntrinsicType a1( A.load(i,k) );
7106  xmm1 = xmm1 + a1 * B.load(k,j );
7107  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
7108  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
7109  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
7110  }
7111 
7112  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7113  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7114  (~C)(i,j+2UL) -= sum( xmm3 ) * scalar;
7115  (~C)(i,j+3UL) -= sum( xmm4 ) * scalar;
7116  }
7117 
7118  for( ; (j+2UL) <= N; j+=2UL )
7119  {
7120  const size_t kbegin( ( IsUpper<MT4>::value )
7121  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7122  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7123  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
7124 
7125  IntrinsicType xmm1, xmm2;
7126 
7127  for( size_t k=kbegin; k<kend; k+=IT::size ) {
7128  const IntrinsicType a1( A.load(i,k) );
7129  xmm1 = xmm1 + a1 * B.load(k,j );
7130  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
7131  }
7132 
7133  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7134  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7135  }
7136 
7137  if( j < N )
7138  {
7139  const size_t kbegin( ( IsUpper<MT4>::value )
7140  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7141  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7142 
7143  IntrinsicType xmm1;
7144 
7145  for( size_t k=kbegin; k<K; k+=IT::size ) {
7146  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
7147  }
7148 
7149  (~C)(i,j) -= sum( xmm1 ) * scalar;
7150  }
7151  }
7152  }
7153  //**********************************************************************************************
7154 
7155  //**Vectorized default subtraction assignment to column-major dense matrices (small matrices)***
7170  template< typename MT3 // Type of the left-hand side target matrix
7171  , typename MT4 // Type of the left-hand side matrix operand
7172  , typename MT5 // Type of the right-hand side matrix operand
7173  , typename ST2 > // Type of the scalar value
7174  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7175  selectSmallSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7176  {
7177  typedef IntrinsicTrait<ElementType> IT;
7178 
7179  const size_t M( A.rows() );
7180  const size_t N( B.columns() );
7181  const size_t K( A.columns() );
7182 
7183  size_t i( 0UL );
7184 
7185  for( ; (i+4UL) <= M; i+=4UL )
7186  {
7187  size_t j( 0UL );
7188 
7189  for( ; (j+2UL) <= N; j+=2UL )
7190  {
7191  const size_t kbegin( ( IsUpper<MT4>::value )
7192  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7193  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7194  const size_t kend( ( IsLower<MT4>::value )
7195  ?( IsUpper<MT5>::value ? min( i+4UL, j+2UL ) : ( i+4UL ) )
7196  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7197 
7198  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
7199 
7200  for( size_t k=kbegin; k<kend; k+=IT::size )
7201  {
7202  const IntrinsicType a1( A.load(i ,k) );
7203  const IntrinsicType a2( A.load(i+1UL,k) );
7204  const IntrinsicType a3( A.load(i+2UL,k) );
7205  const IntrinsicType a4( A.load(i+3UL,k) );
7206  const IntrinsicType b1( B.load(k,j ) );
7207  const IntrinsicType b2( B.load(k,j+1UL) );
7208  xmm1 = xmm1 + a1 * b1;
7209  xmm2 = xmm2 + a1 * b2;
7210  xmm3 = xmm3 + a2 * b1;
7211  xmm4 = xmm4 + a2 * b2;
7212  xmm5 = xmm5 + a3 * b1;
7213  xmm6 = xmm6 + a3 * b2;
7214  xmm7 = xmm7 + a4 * b1;
7215  xmm8 = xmm8 + a4 * b2;
7216  }
7217 
7218  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7219  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7220  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7221  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7222  (~C)(i+2UL,j ) -= sum( xmm5 ) * scalar;
7223  (~C)(i+2UL,j+1UL) -= sum( xmm6 ) * scalar;
7224  (~C)(i+3UL,j ) -= sum( xmm7 ) * scalar;
7225  (~C)(i+3UL,j+1UL) -= sum( xmm8 ) * scalar;
7226  }
7227 
7228  if( j < N )
7229  {
7230  const size_t kbegin( ( IsUpper<MT4>::value )
7231  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7232  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7233  const size_t kend( ( IsLower<MT4>::value )?( i+4UL ):( K ) );
7234 
7235  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7236 
7237  for( size_t k=kbegin; k<kend; k+=IT::size ) {
7238  const IntrinsicType b1( B.load(k,j) );
7239  xmm1 = xmm1 + A.load(i ,k) * b1;
7240  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
7241  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
7242  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
7243  }
7244 
7245  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7246  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7247  (~C)(i+2UL,j) -= sum( xmm3 ) * scalar;
7248  (~C)(i+3UL,j) -= sum( xmm4 ) * scalar;
7249  }
7250  }
7251 
7252  for( ; (i+2UL) <= M; i+=2UL )
7253  {
7254  size_t j( 0UL );
7255 
7256  for( ; (j+2UL) <= N; j+=2UL )
7257  {
7258  const size_t kbegin( ( IsUpper<MT4>::value )
7259  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7260  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7261  const size_t kend( ( IsLower<MT4>::value )
7262  ?( IsUpper<MT5>::value ? min( i+2UL, j+2UL ) : ( i+2UL ) )
7263  :( IsUpper<MT5>::value ? ( j+2UL ) : K ) );
7264 
7265  IntrinsicType xmm1, xmm2, xmm3, xmm4;
7266 
7267  for( size_t k=kbegin; k<kend; k+=IT::size ) {
7268  const IntrinsicType a1( A.load(i ,k) );
7269  const IntrinsicType a2( A.load(i+1UL,k) );
7270  const IntrinsicType b1( B.load(k,j ) );
7271  const IntrinsicType b2( B.load(k,j+1UL) );
7272  xmm1 = xmm1 + a1 * b1;
7273  xmm2 = xmm2 + a1 * b2;
7274  xmm3 = xmm3 + a2 * b1;
7275  xmm4 = xmm4 + a2 * b2;
7276  }
7277 
7278  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
7279  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
7280  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
7281  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
7282  }
7283 
7284  if( j < N )
7285  {
7286  const size_t kbegin( ( IsUpper<MT4>::value )
7287  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7288  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7289  const size_t kend( ( IsLower<MT4>::value )?( i+2UL ):( K ) );
7290 
7291  IntrinsicType xmm1, xmm2;
7292 
7293  for( size_t k=kbegin; k<kend; k+=IT::size ) {
7294  const IntrinsicType b1( B.load(k,j) );
7295  xmm1 = xmm1 + A.load(i ,k) * b1;
7296  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
7297  }
7298 
7299  (~C)(i ,j) -= sum( xmm1 ) * scalar;
7300  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
7301  }
7302  }
7303 
7304  if( i < M )
7305  {
7306  size_t j( 0UL );
7307 
7308  for( ; (j+2UL) <= N; j+=2UL )
7309  {
7310  const size_t kbegin( ( IsUpper<MT4>::value )
7311  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7312  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7313  const size_t kend( ( IsUpper<MT5>::value )?( j+2UL ):( K ) );
7314 
7315  IntrinsicType xmm1, xmm2;
7316 
7317  for( size_t k=kbegin; k<kend; k+=IT::size ) {
7318  const IntrinsicType a1( A.load(i,k) );
7319  xmm1 = xmm1 + a1 * B.load(k,j );
7320  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
7321  }
7322 
7323  (~C)(i,j ) -= sum( xmm1 ) * scalar;
7324  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
7325  }
7326 
7327  if( j < N )
7328  {
7329  const size_t kbegin( ( IsUpper<MT4>::value )
7330  ?( ( IsLower<MT5>::value ? max( i, j ) : i ) & size_t(-IT::size) )
7331  :( IsLower<MT5>::value ? ( j & size_t(-IT::size) ) : 0UL ) );
7332 
7333  IntrinsicType xmm1;
7334 
7335  for( size_t k=kbegin; k<K; k+=IT::size ) {
7336  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
7337  }
7338 
7339  (~C)(i,j) -= sum( xmm1 ) * scalar;
7340  }
7341  }
7342  }
7343  //**********************************************************************************************
7344 
7345  //**Default subtraction assignment to dense matrices (large matrices)***************************
7359  template< typename MT3 // Type of the left-hand side target matrix
7360  , typename MT4 // Type of the left-hand side matrix operand
7361  , typename MT5 // Type of the right-hand side matrix operand
7362  , typename ST2 > // Type of the scalar value
7363  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7364  selectLargeSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7365  {
7366  selectDefaultSubAssignKernel( C, A, B, scalar );
7367  }
7368  //**********************************************************************************************
7369 
7370  //**Vectorized default subtraction assignment to row-major dense matrices (large matrices)******
7385  template< typename MT3 // Type of the left-hand side target matrix
7386  , typename MT4 // Type of the left-hand side matrix operand
7387  , typename MT5 // Type of the right-hand side matrix operand
7388  , typename ST2 > // Type of the scalar value
7389  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7390  selectLargeSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
7391  {
7392  // TODO
7393  selectSmallSubAssignKernel( ~C, A, B, scalar );
7394  }
7395  //**********************************************************************************************
7396 
7397  //**Vectorized default subtraction assignment to column-major dense matrices (large matrices)***
7412  template< typename MT3 // Type of the left-hand side target matrix
7413  , typename MT4 // Type of the left-hand side matrix operand
7414  , typename MT5 // Type of the right-hand side matrix operand
7415  , typename ST2 > // Type of the scalar value
7416  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7417  selectLargeSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
7418  {
7419  // TODO
7420  selectSmallSubAssignKernel( ~C, A, B, scalar );
7421  }
7422  //**********************************************************************************************
7423 
7424  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
7438  template< typename MT3 // Type of the left-hand side target matrix
7439  , typename MT4 // Type of the left-hand side matrix operand
7440  , typename MT5 // Type of the right-hand side matrix operand
7441  , typename ST2 > // Type of the scalar value
7442  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
7443  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7444  {
7445  selectLargeSubAssignKernel( C, A, B, scalar );
7446  }
7447  //**********************************************************************************************
7448 
7449  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
7450 #if BLAZE_BLAS_MODE
7451 
7464  template< typename MT3 // Type of the left-hand side target matrix
7465  , typename MT4 // Type of the left-hand side matrix operand
7466  , typename MT5 // Type of the right-hand side matrix operand
7467  , typename ST2 > // Type of the scalar value
7468  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
7469  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7470  {
7471  if( IsTriangular<MT4>::value ) {
7472  typename MT3::ResultType tmp( B );
7473  strmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
7474  subAssign( C, tmp );
7475  }
7476  else if( IsTriangular<MT5>::value ) {
7477  typename MT3::ResultType tmp( A );
7478  strmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
7479  subAssign( C, tmp );
7480  }
7481  else {
7482  sgemm( C, A, B, -scalar, 1.0F );
7483  }
7484  }
7485 #endif
7486  //**********************************************************************************************
7487 
7488  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
7489 #if BLAZE_BLAS_MODE
7490 
7503  template< typename MT3 // Type of the left-hand side target matrix
7504  , typename MT4 // Type of the left-hand side matrix operand
7505  , typename MT5 // Type of the right-hand side matrix operand
7506  , typename ST2 > // Type of the scalar value
7507  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
7508  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7509  {
7510  if( IsTriangular<MT4>::value ) {
7511  typename MT3::ResultType tmp( B );
7512  dtrmm( tmp, A, CblasLeft, ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ), scalar );
7513  subAssign( C, tmp );
7514  }
7515  else if( IsTriangular<MT5>::value ) {
7516  typename MT3::ResultType tmp( A );
7517  dtrmm( tmp, B, CblasRight, ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ), scalar );
7518  subAssign( C, tmp );
7519  }
7520  else {
7521  dgemm( C, A, B, -scalar, 1.0 );
7522  }
7523  }
7524 #endif
7525  //**********************************************************************************************
7526 
7527  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
7528 #if BLAZE_BLAS_MODE
7529 
7542  template< typename MT3 // Type of the left-hand side target matrix
7543  , typename MT4 // Type of the left-hand side matrix operand
7544  , typename MT5 // Type of the right-hand side matrix operand
7545  , typename ST2 > // Type of the scalar value
7546  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
7547  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7548  {
7549  if( IsTriangular<MT4>::value ) {
7550  typename MT3::ResultType tmp( B );
7551  ctrmm( tmp, A, CblasLeft,
7552  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
7553  complex<float>( scalar, 0.0F ) );
7554  subAssign( C, tmp );
7555  }
7556  else if( IsTriangular<MT5>::value ) {
7557  typename MT3::ResultType tmp( A );
7558  ctrmm( tmp, B, CblasRight,
7559  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
7560  complex<float>( scalar, 0.0F ) );
7561  subAssign( C, tmp );
7562  }
7563  else {
7564  cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
7565  }
7566  }
7567 #endif
7568  //**********************************************************************************************
7569 
7570  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
7571 #if BLAZE_BLAS_MODE
7572 
7585  template< typename MT3 // Type of the left-hand side target matrix
7586  , typename MT4 // Type of the left-hand side matrix operand
7587  , typename MT5 // Type of the right-hand side matrix operand
7588  , typename ST2 > // Type of the scalar value
7589  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
7590  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
7591  {
7592  if( IsTriangular<MT4>::value ) {
7593  typename MT3::ResultType tmp( B );
7594  ztrmm( tmp, A, CblasLeft,
7595  ( IsLower<MT4>::value )?( CblasLower ):( CblasUpper ),
7596  complex<float>( scalar, 0.0 ) );
7597  subAssign( C, tmp );
7598  }
7599  else if( IsTriangular<MT5>::value ) {
7600  typename MT3::ResultType tmp( A );
7601  ztrmm( tmp, B, CblasRight,
7602  ( IsLower<MT5>::value )?( CblasLower ):( CblasUpper ),
7603  complex<float>( scalar, 0.0 ) );
7604  subAssign( C, tmp );
7605  }
7606  else {
7607  zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
7608  }
7609  }
7610 #endif
7611  //**********************************************************************************************
7612 
7613  //**Subtraction assignment to sparse matrices***************************************************
7614  // No special implementation for the subtraction assignment to sparse matrices.
7615  //**********************************************************************************************
7616 
7617  //**Multiplication assignment to dense matrices*************************************************
7618  // No special implementation for the multiplication assignment to dense matrices.
7619  //**********************************************************************************************
7620 
7621  //**Multiplication assignment to sparse matrices************************************************
7622  // No special implementation for the multiplication assignment to sparse matrices.
7623  //**********************************************************************************************
7624 
7625  //**SMP assignment to dense matrices************************************************************
7640  template< typename MT // Type of the target dense matrix
7641  , bool SO > // Storage order of the target dense matrix
7642  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
7643  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7644  {
7646 
7647  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7648  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7649 
7650  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
7651  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
7652 
7653  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
7654  return;
7655  }
7656  else if( left.columns() == 0UL ) {
7657  reset( ~lhs );
7658  return;
7659  }
7660 
7661  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7662  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7663 
7664  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7665  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7666  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7667  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7668  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7669  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7670 
7671  smpAssign( ~lhs, A * B * rhs.scalar_ );
7672  }
7673  //**********************************************************************************************
7674 
7675  //**SMP assignment to sparse matrices***********************************************************
7690  template< typename MT // Type of the target sparse matrix
7691  , bool SO > // Storage order of the target sparse matrix
7692  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
7693  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7694  {
7696 
7697  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
7698 
7705 
7706  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7707  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7708 
7709  const TmpType tmp( rhs );
7710  smpAssign( ~lhs, tmp );
7711  }
7712  //**********************************************************************************************
7713 
7714  //**SMP addition assignment to dense matrices***************************************************
7729  template< typename MT // Type of the target dense matrix
7730  , bool SO > // Storage order of the target dense matrix
7731  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
7732  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7733  {
7735 
7736  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7737  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7738 
7739  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
7740  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
7741 
7742  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7743  return;
7744  }
7745 
7746  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7747  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7748 
7749  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7750  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7751  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7752  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7753  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7754  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7755 
7756  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
7757  }
7758  //**********************************************************************************************
7759 
7760  //**SMP addition assignment to sparse matrices**************************************************
7761  // No special implementation for the SMP addition assignment to sparse matrices.
7762  //**********************************************************************************************
7763 
7764  //**SMP subtraction assignment to dense matrices************************************************
7779  template< typename MT // Type of the target dense matrix
7780  , bool SO > // Storage order of the target dense matrix
7781  friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
7782  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
7783  {
7785 
7786  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
7787  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
7788 
7789  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
7790  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
7791 
7792  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
7793  return;
7794  }
7795 
7796  LT A( left ); // Evaluation of the left-hand side dense matrix operand
7797  RT B( right ); // Evaluation of the right-hand side dense matrix operand
7798 
7799  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
7800  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
7801  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
7802  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
7803  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
7804  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
7805 
7806  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
7807  }
7808  //**********************************************************************************************
7809 
7810  //**SMP subtraction assignment to sparse matrices***********************************************
7811  // No special implementation for the SMP subtraction assignment to sparse matrices.
7812  //**********************************************************************************************
7813 
7814  //**SMP multiplication assignment to dense matrices*********************************************
7815  // No special implementation for the SMP multiplication assignment to dense matrices.
7816  //**********************************************************************************************
7817 
7818  //**SMP multiplication assignment to sparse matrices********************************************
7819  // No special implementation for the SMP multiplication assignment to sparse matrices.
7820  //**********************************************************************************************
7821 
7822  //**Compile time checks*************************************************************************
7830  BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE( ST, RightOperand );
7831  //**********************************************************************************************
7832 };
7834 //*************************************************************************************************
7835 
7836 
7837 
7838 
7839 //=================================================================================================
7840 //
7841 // GLOBAL BINARY ARITHMETIC OPERATORS
7842 //
7843 //=================================================================================================
7844 
7845 //*************************************************************************************************
7874 template< typename T1 // Type of the left-hand side dense matrix
7875  , typename T2 > // Type of the right-hand side dense matrix
7876 inline const DMatTDMatMultExpr<T1,T2>
7878 {
7880 
7881  if( (~lhs).columns() != (~rhs).rows() )
7882  throw std::invalid_argument( "Matrix sizes do not match" );
7883 
7884  return DMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
7885 }
7886 //*************************************************************************************************
7887 
7888 
7889 
7890 
7891 //=================================================================================================
7892 //
7893 // ROWS SPECIALIZATIONS
7894 //
7895 //=================================================================================================
7896 
7897 //*************************************************************************************************
7899 template< typename MT1, typename MT2 >
7900 struct Rows< DMatTDMatMultExpr<MT1,MT2> >
7901  : public Rows<MT1>
7902 {};
7904 //*************************************************************************************************
7905 
7906 
7907 
7908 
7909 //=================================================================================================
7910 //
7911 // COLUMNS SPECIALIZATIONS
7912 //
7913 //=================================================================================================
7914 
7915 //*************************************************************************************************
7917 template< typename MT1, typename MT2 >
7918 struct Columns< DMatTDMatMultExpr<MT1,MT2> >
7919  : public Columns<MT2>
7920 {};
7922 //*************************************************************************************************
7923 
7924 
7925 
7926 
7927 //=================================================================================================
7928 //
7929 // ISLOWER SPECIALIZATIONS
7930 //
7931 //=================================================================================================
7932 
7933 //*************************************************************************************************
7935 template< typename MT1, typename MT2 >
7936 struct IsLower< DMatTDMatMultExpr<MT1,MT2> >
7937  : public IsTrue< And< IsLower<MT1>, IsLower<MT2> >::value >
7938 {};
7940 //*************************************************************************************************
7941 
7942 
7943 
7944 
7945 //=================================================================================================
7946 //
7947 // ISUNILOWER SPECIALIZATIONS
7948 //
7949 //=================================================================================================
7950 
7951 //*************************************************************************************************
7953 template< typename MT1, typename MT2 >
7954 struct IsUniLower< DMatTDMatMultExpr<MT1,MT2> >
7955  : public IsTrue< And< IsUniLower<MT1>, IsUniLower<MT2> >::value >
7956 {};
7958 //*************************************************************************************************
7959 
7960 
7961 
7962 
7963 //=================================================================================================
7964 //
7965 // ISSTRICTLYLOWER SPECIALIZATIONS
7966 //
7967 //=================================================================================================
7968 
7969 //*************************************************************************************************
7971 template< typename MT1, typename MT2 >
7972 struct IsStrictlyLower< DMatTDMatMultExpr<MT1,MT2> >
7973  : public IsTrue< Or< And< IsStrictlyLower<MT1>, IsLower<MT2> >
7974  , And< IsStrictlyLower<MT2>, IsLower<MT1> > >::value >
7975 {};
7977 //*************************************************************************************************
7978 
7979 
7980 
7981 
7982 //=================================================================================================
7983 //
7984 // ISUPPER SPECIALIZATIONS
7985 //
7986 //=================================================================================================
7987 
7988 //*************************************************************************************************
7990 template< typename MT1, typename MT2 >
7991 struct IsUpper< DMatTDMatMultExpr<MT1,MT2> >
7992  : public IsTrue< And< IsUpper<MT1>, IsUpper<MT2> >::value >
7993 {};
7995 //*************************************************************************************************
7996 
7997 
7998 
7999 
8000 //=================================================================================================
8001 //
8002 // ISUNIUPPER SPECIALIZATIONS
8003 //
8004 //=================================================================================================
8005 
8006 //*************************************************************************************************
8008 template< typename MT1, typename MT2 >
8009 struct IsUniUpper< DMatTDMatMultExpr<MT1,MT2> >
8010  : public IsTrue< And< IsUniUpper<MT1>, IsUniUpper<MT2> >::value >
8011 {};
8013 //*************************************************************************************************
8014 
8015 
8016 
8017 
8018 //=================================================================================================
8019 //
8020 // ISSTRICTLYUPPER SPECIALIZATIONS
8021 //
8022 //=================================================================================================
8023 
8024 //*************************************************************************************************
8026 template< typename MT1, typename MT2 >
8027 struct IsStrictlyUpper< DMatTDMatMultExpr<MT1,MT2> >
8028  : public IsTrue< Or< And< IsStrictlyUpper<MT1>, IsUpper<MT2> >
8029  , And< IsStrictlyUpper<MT2>, IsUpper<MT1> > >::value >
8030 {};
8032 //*************************************************************************************************
8033 
8034 
8035 
8036 
8037 //=================================================================================================
8038 //
8039 // EXPRESSION TRAIT SPECIALIZATIONS
8040 //
8041 //=================================================================================================
8042 
8043 //*************************************************************************************************
8045 template< typename MT1, typename MT2, typename VT >
8046 struct DMatDVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
8047 {
8048  public:
8049  //**********************************************************************************************
8050  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8051  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
8052  IsDenseVector<VT>::value && IsColumnVector<VT>::value
8053  , typename DMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
8054  , INVALID_TYPE >::Type Type;
8055  //**********************************************************************************************
8056 };
8058 //*************************************************************************************************
8059 
8060 
8061 //*************************************************************************************************
8063 template< typename MT1, typename MT2, typename VT >
8064 struct DMatSVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
8065 {
8066  public:
8067  //**********************************************************************************************
8068  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8069  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
8070  IsSparseVector<VT>::value && IsColumnVector<VT>::value
8071  , typename DMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
8072  , INVALID_TYPE >::Type Type;
8073  //**********************************************************************************************
8074 };
8076 //*************************************************************************************************
8077 
8078 
8079 //*************************************************************************************************
8081 template< typename VT, typename MT1, typename MT2 >
8082 struct TDVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
8083 {
8084  public:
8085  //**********************************************************************************************
8086  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
8087  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8088  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
8089  , typename TDVecTDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
8090  , INVALID_TYPE >::Type Type;
8091  //**********************************************************************************************
8092 };
8094 //*************************************************************************************************
8095 
8096 
8097 //*************************************************************************************************
8099 template< typename VT, typename MT1, typename MT2 >
8100 struct TSVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
8101 {
8102  public:
8103  //**********************************************************************************************
8104  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
8105  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
8106  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
8107  , typename TDVecTDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
8108  , INVALID_TYPE >::Type Type;
8109  //**********************************************************************************************
8110 };
8112 //*************************************************************************************************
8113 
8114 
8115 //*************************************************************************************************
8117 template< typename MT1, typename MT2, bool AF >
8118 struct SubmatrixExprTrait< DMatTDMatMultExpr<MT1,MT2>, AF >
8119 {
8120  public:
8121  //**********************************************************************************************
8122  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
8123  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
8124  //**********************************************************************************************
8125 };
8127 //*************************************************************************************************
8128 
8129 
8130 //*************************************************************************************************
8132 template< typename MT1, typename MT2 >
8133 struct RowExprTrait< DMatTDMatMultExpr<MT1,MT2> >
8134 {
8135  public:
8136  //**********************************************************************************************
8137  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
8138  //**********************************************************************************************
8139 };
8141 //*************************************************************************************************
8142 
8143 
8144 //*************************************************************************************************
8146 template< typename MT1, typename MT2 >
8147 struct ColumnExprTrait< DMatTDMatMultExpr<MT1,MT2> >
8148 {
8149  public:
8150  //**********************************************************************************************
8151  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
8152  //**********************************************************************************************
8153 };
8155 //*************************************************************************************************
8156 
8157 } // namespace blaze
8158 
8159 #endif
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatTDMatMultExpr.h:470
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:289
BLAZE_ALWAYS_INLINE int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:63
const MT::ElementType max(const DenseMatrix< MT, SO > &dm)
Returns the largest element of the dense matrix.
Definition: DenseMatrix.h:1649
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
Header file for mathematical functions.
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:297
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
Header file for the IsUniUpper type trait.
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:8247
Compile time check for triangular matrix types.This type trait tests whether or not the given templat...
Definition: IsTriangular.h:105
Header file for basic type definitions.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:291
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:264
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:343
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:209
Header file for the IsDiagonal type trait.
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Header file for the IsColumnMajorMatrix type trait.
void reset(const DiagonalProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: DiagonalProxy.h:821
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatTDMatMultExpr.h:460
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2507
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:261
DMatTDMatMultExpr< MT1, MT2 > This
Type of this DMatTDMatMultExpr instance.
Definition: DMatTDMatMultExpr.h:287
Header file for the And class template.
Compile time check for lower triangular matrices.This type trait tests whether or not the given templ...
Definition: IsLower.h:90
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:259
Header file for the TDVecSMatMultExprTrait class template.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:699
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Compile time check for upper triangular matrices.This type trait tests whether or not the given templ...
Definition: IsUpper.h:90
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:140
Header file for the IsUniLower type trait.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:142
Constraint on the data type.
Header file for the MultExprTrait class template.
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:328
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:406
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:294
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsStrictlyUpper type trait.
Header file for the IsSymmetric type trait.
Namespace of the Blaze C++ math library.
Definition: Blaze.h:57
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
const size_t SMP_DMATTDMATMULT_THRESHOLD
SMP row-major dense matrix/column-major dense matrix multiplication threshold.This threshold specifie...
Definition: Thresholds.h:857
Header file for the Or class template.
Header file for the TDMatSVecMultExprTrait class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:450
const MT::ElementType min(const DenseMatrix< MT, SO > &dm)
Returns the smallest element of the dense matrix.
Definition: DenseMatrix.h:1602
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:139
Header file for the Not class template.
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
Compile time check for diagonal matrices.This type trait tests whether or not the given template para...
Definition: IsDiagonal.h:92
Header file for BLAS level 3 functions.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:143
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Header file for the IsTriangular type trait.
Constraints on the storage order of matrix types.
Compile time check for strictly upper triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyUpper.h:86
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2505
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the IsStrictlyLower type trait.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatTDMatMultExpr.h:292
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:396
Header file for the IsNumeric type trait.
Header file for the HasConstDataAccess type trait.
System settings for the BLAS mode.
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:150
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:288
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
Header file for the HasMutableDataAccess type trait.
const size_t DMATTDMATMULT_THRESHOLD
Row-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies the ...
Definition: Thresholds.h:142
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:438
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:141
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
Compile time check for strictly lower triangular matrices.This type trait tests whether or not the gi...
Definition: IsStrictlyLower.h:86
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:416
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:260
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:293
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2502
Header file for the IsTrue value trait.
Header file for the IsComplex type trait.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:479
Header file for the complex data type.
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:132
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:138
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:480
Header file for the IsUpper type trait.
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:426
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:290
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:303
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:306
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:300
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849