All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
52 #include <blaze/math/Intrinsics.h>
53 #include <blaze/math/shims/Reset.h>
76 #include <blaze/system/BLAS.h>
78 #include <blaze/util/Assert.h>
79 #include <blaze/util/Complex.h>
85 #include <blaze/util/DisableIf.h>
86 #include <blaze/util/EnableIf.h>
87 #include <blaze/util/InvalidType.h>
89 #include <blaze/util/SelectType.h>
90 #include <blaze/util/Types.h>
96 
97 
98 namespace blaze {
99 
100 //=================================================================================================
101 //
102 // CLASS DMATTDMATMULTEXPR
103 //
104 //=================================================================================================
105 
106 //*************************************************************************************************
113 template< typename MT1 // Type of the left-hand side dense matrix
114  , typename MT2 > // Type of the right-hand side dense matrix
115 class DMatTDMatMultExpr : public DenseMatrix< DMatTDMatMultExpr<MT1,MT2>, false >
116  , private MatMatMultExpr
117  , private Computation
118 {
119  private:
120  //**Type definitions****************************************************************************
121  typedef typename MT1::ResultType RT1;
122  typedef typename MT2::ResultType RT2;
123  typedef typename RT1::ElementType ET1;
124  typedef typename RT2::ElementType ET2;
125  typedef typename MT1::CompositeType CT1;
126  typedef typename MT2::CompositeType CT2;
127  //**********************************************************************************************
128 
129  //**********************************************************************************************
132  //**********************************************************************************************
133 
134  //**********************************************************************************************
136  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
137  //**********************************************************************************************
138 
139  //**********************************************************************************************
141 
145  template< typename MT >
146  struct UseSMPAssign {
147  enum { value = ( evaluateLeft || evaluateRight ) };
148  };
150  //**********************************************************************************************
151 
152  //**********************************************************************************************
154 
157  template< typename T1, typename T2, typename T3 >
158  struct UseSinglePrecisionKernel {
159  enum { value = IsFloat<typename T1::ElementType>::value &&
160  IsFloat<typename T2::ElementType>::value &&
161  IsFloat<typename T3::ElementType>::value };
162  };
164  //**********************************************************************************************
165 
166  //**********************************************************************************************
168 
171  template< typename T1, typename T2, typename T3 >
172  struct UseDoublePrecisionKernel {
173  enum { value = IsDouble<typename T1::ElementType>::value &&
174  IsDouble<typename T2::ElementType>::value &&
175  IsDouble<typename T3::ElementType>::value };
176  };
178  //**********************************************************************************************
179 
180  //**********************************************************************************************
182 
186  template< typename T1, typename T2, typename T3 >
187  struct UseSinglePrecisionComplexKernel {
188  typedef complex<float> Type;
189  enum { value = IsSame<typename T1::ElementType,Type>::value &&
190  IsSame<typename T2::ElementType,Type>::value &&
191  IsSame<typename T3::ElementType,Type>::value };
192  };
194  //**********************************************************************************************
195 
196  //**********************************************************************************************
198 
202  template< typename T1, typename T2, typename T3 >
203  struct UseDoublePrecisionComplexKernel {
204  typedef complex<double> Type;
205  enum { value = IsSame<typename T1::ElementType,Type>::value &&
206  IsSame<typename T2::ElementType,Type>::value &&
207  IsSame<typename T3::ElementType,Type>::value };
208  };
210  //**********************************************************************************************
211 
212  //**********************************************************************************************
214 
217  template< typename T1, typename T2, typename T3 >
218  struct UseDefaultKernel {
219  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
220  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
221  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
222  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
223  };
225  //**********************************************************************************************
226 
227  //**********************************************************************************************
229 
232  template< typename T1, typename T2, typename T3 >
233  struct UseVectorizedDefaultKernel {
234  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
235  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
236  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
237  IntrinsicTrait<typename T1::ElementType>::addition &&
238  IntrinsicTrait<typename T1::ElementType>::multiplication };
239  };
241  //**********************************************************************************************
242 
243  public:
244  //**Type definitions****************************************************************************
251  typedef const ElementType ReturnType;
252  typedef const ResultType CompositeType;
253 
255  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
256 
258  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
259 
262 
265  //**********************************************************************************************
266 
267  //**Compilation flags***************************************************************************
269  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
273 
275  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
276  !evaluateRight && MT2::smpAssignable };
277  //**********************************************************************************************
278 
279  //**Constructor*********************************************************************************
285  explicit inline DMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
286  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
287  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
288  {
289  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
290  }
291  //**********************************************************************************************
292 
293  //**Access operator*****************************************************************************
300  inline ReturnType operator()( size_t i, size_t j ) const {
301  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
302  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
303 
304  ElementType tmp;
305 
306  if( lhs_.columns() != 0UL ) {
307  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
308  tmp = lhs_(i,0UL) * rhs_(0UL,j);
309  for( size_t k=1UL; k<end; k+=2UL ) {
310  tmp += lhs_(i,k ) * rhs_(k ,j);
311  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
312  }
313  if( end < lhs_.columns() ) {
314  tmp += lhs_(i,end) * rhs_(end,j);
315  }
316  }
317  else {
318  reset( tmp );
319  }
320 
321  return tmp;
322  }
323  //**********************************************************************************************
324 
325  //**Rows function*******************************************************************************
330  inline size_t rows() const {
331  return lhs_.rows();
332  }
333  //**********************************************************************************************
334 
335  //**Columns function****************************************************************************
340  inline size_t columns() const {
341  return rhs_.columns();
342  }
343  //**********************************************************************************************
344 
345  //**Left operand access*************************************************************************
350  inline LeftOperand leftOperand() const {
351  return lhs_;
352  }
353  //**********************************************************************************************
354 
355  //**Right operand access************************************************************************
360  inline RightOperand rightOperand() const {
361  return rhs_;
362  }
363  //**********************************************************************************************
364 
365  //**********************************************************************************************
371  template< typename T >
372  inline bool canAlias( const T* alias ) const {
373  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
374  }
375  //**********************************************************************************************
376 
377  //**********************************************************************************************
383  template< typename T >
384  inline bool isAliased( const T* alias ) const {
385  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
386  }
387  //**********************************************************************************************
388 
389  //**********************************************************************************************
394  inline bool isAligned() const {
395  return lhs_.isAligned() && rhs_.isAligned();
396  }
397  //**********************************************************************************************
398 
399  //**********************************************************************************************
404  inline bool canSMPAssign() const {
405  return ( !BLAZE_BLAS_IS_PARALLEL ||
406  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
408  }
409  //**********************************************************************************************
410 
411  private:
412  //**Member variables****************************************************************************
415  //**********************************************************************************************
416 
417  //**Assignment to dense matrices****************************************************************
430  template< typename MT // Type of the target dense matrix
431  , bool SO > // Storage order of the target dense matrix
432  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
433  {
435 
436  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
437  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
438 
439  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
440  return;
441  }
442  else if( rhs.lhs_.columns() == 0UL ) {
443  reset( ~lhs );
444  return;
445  }
446 
447  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
448  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
449 
450  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
451  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
452  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
453  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
454  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
455  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
456 
457  DMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
458  }
460  //**********************************************************************************************
461 
462  //**Assignment to dense matrices (kernel selection)*********************************************
473  template< typename MT3 // Type of the left-hand side target matrix
474  , typename MT4 // Type of the left-hand side matrix operand
475  , typename MT5 > // Type of the right-hand side matrix operand
476  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
477  {
478  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
479  DMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
480  else
481  DMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
482  }
484  //**********************************************************************************************
485 
486  //**Default assignment to dense matrices********************************************************
500  template< typename MT3 // Type of the left-hand side target matrix
501  , typename MT4 // Type of the left-hand side matrix operand
502  , typename MT5 > // Type of the right-hand side matrix operand
503  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
504  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
505  {
506  const size_t M( A.rows() );
507  const size_t N( B.columns() );
508  const size_t K( A.columns() );
509 
510  for( size_t i=0UL; i<M; ++i ) {
511  for( size_t j=0UL; j<N; ++j ) {
512  C(i,j) = A(i,0UL) * B(0UL,j);
513  }
514  for( size_t k=1UL; k<K; ++k ) {
515  for( size_t j=0UL; j<N; ++j ) {
516  C(i,j) += A(i,k) * B(k,j);
517  }
518  }
519  }
520  }
522  //**********************************************************************************************
523 
524  //**Vectorized default assignment to row-major dense matrices***********************************
538  template< typename MT3 // Type of the left-hand side target matrix
539  , typename MT4 // Type of the left-hand side matrix operand
540  , typename MT5 > // Type of the right-hand side matrix operand
541  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
542  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
543  {
544  typedef IntrinsicTrait<ElementType> IT;
545 
546  const size_t M( A.rows() );
547  const size_t N( B.columns() );
548  const size_t K( A.columns() );
549 
550  size_t i( 0UL );
551 
552  for( ; (i+2UL) <= M; i+=2UL ) {
553  size_t j( 0UL );
554  for( ; (j+4UL) <= N; j+=4UL ) {
555  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
556  for( size_t k=0UL; k<K; k+=IT::size ) {
557  const IntrinsicType a1( A.load(i ,k) );
558  const IntrinsicType a2( A.load(i+1UL,k) );
559  const IntrinsicType b1( B.load(k,j ) );
560  const IntrinsicType b2( B.load(k,j+1UL) );
561  const IntrinsicType b3( B.load(k,j+2UL) );
562  const IntrinsicType b4( B.load(k,j+3UL) );
563  xmm1 = xmm1 + a1 * b1;
564  xmm2 = xmm2 + a1 * b2;
565  xmm3 = xmm3 + a1 * b3;
566  xmm4 = xmm4 + a1 * b4;
567  xmm5 = xmm5 + a2 * b1;
568  xmm6 = xmm6 + a2 * b2;
569  xmm7 = xmm7 + a2 * b3;
570  xmm8 = xmm8 + a2 * b4;
571  }
572  (~C)(i ,j ) = sum( xmm1 );
573  (~C)(i ,j+1UL) = sum( xmm2 );
574  (~C)(i ,j+2UL) = sum( xmm3 );
575  (~C)(i ,j+3UL) = sum( xmm4 );
576  (~C)(i+1UL,j ) = sum( xmm5 );
577  (~C)(i+1UL,j+1UL) = sum( xmm6 );
578  (~C)(i+1UL,j+2UL) = sum( xmm7 );
579  (~C)(i+1UL,j+3UL) = sum( xmm8 );
580  }
581  for( ; (j+2UL) <= N; j+=2UL ) {
582  IntrinsicType xmm1, xmm2, xmm3, xmm4;
583  for( size_t k=0UL; k<K; k+=IT::size ) {
584  const IntrinsicType a1( A.load(i ,k) );
585  const IntrinsicType a2( A.load(i+1UL,k) );
586  const IntrinsicType b1( B.load(k,j ) );
587  const IntrinsicType b2( B.load(k,j+1UL) );
588  xmm1 = xmm1 + a1 * b1;
589  xmm2 = xmm2 + a1 * b2;
590  xmm3 = xmm3 + a2 * b1;
591  xmm4 = xmm4 + a2 * b2;
592  }
593  (~C)(i ,j ) = sum( xmm1 );
594  (~C)(i ,j+1UL) = sum( xmm2 );
595  (~C)(i+1UL,j ) = sum( xmm3 );
596  (~C)(i+1UL,j+1UL) = sum( xmm4 );
597  }
598  if( j < N ) {
599  IntrinsicType xmm1, xmm2;
600  for( size_t k=0UL; k<K; k+=IT::size ) {
601  const IntrinsicType b1( B.load(k,j) );
602  xmm1 = xmm1 + A.load(i ,k) * b1;
603  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
604  }
605  (~C)(i ,j) = sum( xmm1 );
606  (~C)(i+1UL,j) = sum( xmm2 );
607  }
608  }
609  if( i < M ) {
610  size_t j( 0UL );
611  for( ; (j+4UL) <= N; j+=4UL ) {
612  IntrinsicType xmm1, xmm2, xmm3, xmm4;
613  for( size_t k=0UL; k<K; k+=IT::size ) {
614  const IntrinsicType a1( A.load(i,k) );
615  xmm1 = xmm1 + a1 * B.load(k,j );
616  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
617  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
618  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
619  }
620  (~C)(i,j ) = sum( xmm1 );
621  (~C)(i,j+1UL) = sum( xmm2 );
622  (~C)(i,j+2UL) = sum( xmm3 );
623  (~C)(i,j+3UL) = sum( xmm4 );
624  }
625  for( ; (j+2UL) <= N; j+=2UL ) {
626  IntrinsicType xmm1, xmm2;
627  for( size_t k=0UL; k<K; k+=IT::size ) {
628  const IntrinsicType a1( A.load(i,k) );
629  xmm1 = xmm1 + a1 * B.load(k,j );
630  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
631  }
632  (~C)(i,j ) = sum( xmm1 );
633  (~C)(i,j+1UL) = sum( xmm2 );
634  }
635  if( j < N ) {
636  IntrinsicType xmm1, xmm2;
637  for( size_t k=0UL; k<K; k+=IT::size ) {
638  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
639  }
640  (~C)(i,j) = sum( xmm1 );
641  }
642  }
643  }
645  //**********************************************************************************************
646 
647  //**Vectorized default assignment to column-major dense matrices********************************
661  template< typename MT3 // Type of the left-hand side target matrix
662  , typename MT4 // Type of the left-hand side matrix operand
663  , typename MT5 > // Type of the right-hand side matrix operand
664  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
665  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
666  {
667  typedef IntrinsicTrait<ElementType> IT;
668 
669  const size_t M( A.rows() );
670  const size_t N( B.columns() );
671  const size_t K( A.columns() );
672 
673  size_t i( 0UL );
674 
675  for( ; (i+4UL) <= M; i+=4UL ) {
676  size_t j( 0UL );
677  for( ; (j+2UL) <= N; j+=2UL ) {
678  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
679  for( size_t k=0UL; k<K; k+=IT::size ) {
680  const IntrinsicType a1( A.load(i ,k) );
681  const IntrinsicType a2( A.load(i+1UL,k) );
682  const IntrinsicType a3( A.load(i+2UL,k) );
683  const IntrinsicType a4( A.load(i+3UL,k) );
684  const IntrinsicType b1( B.load(k,j ) );
685  const IntrinsicType b2( B.load(k,j+1UL) );
686  xmm1 = xmm1 + a1 * b1;
687  xmm2 = xmm2 + a1 * b2;
688  xmm3 = xmm3 + a2 * b1;
689  xmm4 = xmm4 + a2 * b2;
690  xmm5 = xmm5 + a3 * b1;
691  xmm6 = xmm6 + a3 * b2;
692  xmm7 = xmm7 + a4 * b1;
693  xmm8 = xmm8 + a4 * b2;
694  }
695  (~C)(i ,j ) = sum( xmm1 );
696  (~C)(i ,j+1UL) = sum( xmm2 );
697  (~C)(i+1UL,j ) = sum( xmm3 );
698  (~C)(i+1UL,j+1UL) = sum( xmm4 );
699  (~C)(i+2UL,j ) = sum( xmm5 );
700  (~C)(i+2UL,j+1UL) = sum( xmm6 );
701  (~C)(i+3UL,j ) = sum( xmm7 );
702  (~C)(i+3UL,j+1UL) = sum( xmm8 );
703  }
704  if( j < N ) {
705  IntrinsicType xmm1, xmm2, xmm3, xmm4;
706  for( size_t k=0UL; k<K; k+=IT::size ) {
707  const IntrinsicType b1( B.load(k,j) );
708  xmm1 = xmm1 + A.load(i ,k) * b1;
709  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
710  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
711  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
712  }
713  (~C)(i ,j) = sum( xmm1 );
714  (~C)(i+1UL,j) = sum( xmm2 );
715  (~C)(i+2UL,j) = sum( xmm3 );
716  (~C)(i+3UL,j) = sum( xmm4 );
717  }
718  }
719  for( ; (i+2UL) <= M; i+=2UL ) {
720  size_t j( 0UL );
721  for( ; (j+2UL) <= N; j+=2UL ) {
722  IntrinsicType xmm1, xmm2, xmm3, xmm4;
723  for( size_t k=0UL; k<K; k+=IT::size ) {
724  const IntrinsicType a1( A.load(i ,k) );
725  const IntrinsicType a2( A.load(i+1UL,k) );
726  const IntrinsicType b1( B.load(k,j ) );
727  const IntrinsicType b2( B.load(k,j+1UL) );
728  xmm1 = xmm1 + a1 * b1;
729  xmm2 = xmm2 + a1 * b2;
730  xmm3 = xmm3 + a2 * b1;
731  xmm4 = xmm4 + a2 * b2;
732  }
733  (~C)(i ,j ) = sum( xmm1 );
734  (~C)(i ,j+1UL) = sum( xmm2 );
735  (~C)(i+1UL,j ) = sum( xmm3 );
736  (~C)(i+1UL,j+1UL) = sum( xmm4 );
737  }
738  if( j < N ) {
739  IntrinsicType xmm1, xmm2;
740  for( size_t k=0UL; k<K; k+=IT::size ) {
741  const IntrinsicType b1( B.load(k,j) );
742  xmm1 = xmm1 + A.load(i ,k) * b1;
743  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
744  }
745  (~C)(i ,j) = sum( xmm1 );
746  (~C)(i+1UL,j) = sum( xmm2 );
747  }
748  }
749  if( i < M ) {
750  size_t j( 0UL );
751  for( ; (j+2UL) <= N; j+=2UL ) {
752  IntrinsicType xmm1, xmm2;
753  for( size_t k=0UL; k<K; k+=IT::size ) {
754  const IntrinsicType a1( A.load(i,k) );
755  xmm1 = xmm1 + a1 * B.load(k,j );
756  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
757  }
758  (~C)(i,j ) = sum( xmm1 );
759  (~C)(i,j+1UL) = sum( xmm2 );
760  }
761  if( j < N ) {
762  IntrinsicType xmm1, xmm2;
763  for( size_t k=0UL; k<K; k+=IT::size ) {
764  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
765  }
766  (~C)(i,j) = sum( xmm1 );
767  }
768  }
769  }
771  //**********************************************************************************************
772 
773  //**Default assignment to dense matrices********************************************************
787  template< typename MT3 // Type of the left-hand side target matrix
788  , typename MT4 // Type of the left-hand side matrix operand
789  , typename MT5 > // Type of the right-hand side matrix operand
790  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
791  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
792  {
793  selectDefaultAssignKernel( C, A, B );
794  }
796  //**********************************************************************************************
797 
798  //**BLAS-based assignment to dense matrices (single precision)**********************************
799 #if BLAZE_BLAS_MODE
800 
813  template< typename MT3 // Type of the left-hand side target matrix
814  , typename MT4 // Type of the left-hand side matrix operand
815  , typename MT5 > // Type of the right-hand side matrix operand
816  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
817  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
818  {
819  using boost::numeric_cast;
820 
824 
825  const int M ( numeric_cast<int>( A.rows() ) );
826  const int N ( numeric_cast<int>( B.columns() ) );
827  const int K ( numeric_cast<int>( A.columns() ) );
828  const int lda( numeric_cast<int>( A.spacing() ) );
829  const int ldb( numeric_cast<int>( B.spacing() ) );
830  const int ldc( numeric_cast<int>( C.spacing() ) );
831 
832  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
833  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
834  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
835  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
836  }
838 #endif
839  //**********************************************************************************************
840 
841  //**BLAS-based assignment to dense matrices (double precision)**********************************
842 #if BLAZE_BLAS_MODE
843 
856  template< typename MT3 // Type of the left-hand side target matrix
857  , typename MT4 // Type of the left-hand side matrix operand
858  , typename MT5 > // Type of the right-hand side matrix operand
859  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
860  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
861  {
862  using boost::numeric_cast;
863 
867 
868  const int M ( numeric_cast<int>( A.rows() ) );
869  const int N ( numeric_cast<int>( B.columns() ) );
870  const int K ( numeric_cast<int>( A.columns() ) );
871  const int lda( numeric_cast<int>( A.spacing() ) );
872  const int ldb( numeric_cast<int>( B.spacing() ) );
873  const int ldc( numeric_cast<int>( C.spacing() ) );
874 
875  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
876  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
877  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
878  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
879  }
881 #endif
882  //**********************************************************************************************
883 
884  //**BLAS-based assignment to dense matrices (single precision complex)**************************
885 #if BLAZE_BLAS_MODE
886 
899  template< typename MT3 // Type of the left-hand side target matrix
900  , typename MT4 // Type of the left-hand side matrix operand
901  , typename MT5 > // Type of the right-hand side matrix operand
902  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
903  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
904  {
905  using boost::numeric_cast;
906 
910  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
911  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
912  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
913 
914  const int M ( numeric_cast<int>( A.rows() ) );
915  const int N ( numeric_cast<int>( B.columns() ) );
916  const int K ( numeric_cast<int>( A.columns() ) );
917  const int lda( numeric_cast<int>( A.spacing() ) );
918  const int ldb( numeric_cast<int>( B.spacing() ) );
919  const int ldc( numeric_cast<int>( C.spacing() ) );
920  const complex<float> alpha( 1.0F, 0.0F );
921  const complex<float> beta ( 0.0F, 0.0F );
922 
923  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
924  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
925  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
926  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
927  }
929 #endif
930  //**********************************************************************************************
931 
932  //**BLAS-based assignment to dense matrices (double precision complex)**************************
933 #if BLAZE_BLAS_MODE
934 
947  template< typename MT3 // Type of the left-hand side target matrix
948  , typename MT4 // Type of the left-hand side matrix operand
949  , typename MT5 > // Type of the right-hand side matrix operand
950  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
951  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
952  {
953  using boost::numeric_cast;
954 
958  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
959  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
960  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
961 
962  const int M ( numeric_cast<int>( A.rows() ) );
963  const int N ( numeric_cast<int>( B.columns() ) );
964  const int K ( numeric_cast<int>( A.columns() ) );
965  const int lda( numeric_cast<int>( A.spacing() ) );
966  const int ldb( numeric_cast<int>( B.spacing() ) );
967  const int ldc( numeric_cast<int>( C.spacing() ) );
968  const complex<double> alpha( 1.0, 0.0 );
969  const complex<double> beta ( 0.0, 0.0 );
970 
971  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
972  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
973  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
974  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
975  }
977 #endif
978  //**********************************************************************************************
979 
980  //**Assignment to sparse matrices***************************************************************
992  template< typename MT // Type of the target sparse matrix
993  , bool SO > // Storage order of the target sparse matrix
994  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
995  {
997 
998  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
999 
1006 
1007  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1008  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1009 
1010  const TmpType tmp( serial( rhs ) );
1011  assign( ~lhs, tmp );
1012  }
1014  //**********************************************************************************************
1015 
1016  //**Addition assignment to dense matrices*******************************************************
1029  template< typename MT // Type of the target dense matrix
1030  , bool SO > // Storage order of the target dense matrix
1031  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1032  {
1034 
1035  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1036  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1037 
1038  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1039  return;
1040  }
1041 
1042  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1043  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1044 
1045  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1046  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1047  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1048  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1049  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1050  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1051 
1052  DMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1053  }
1055  //**********************************************************************************************
1056 
1057  //**Addition assignment to dense matrices (kernel selection)************************************
1068  template< typename MT3 // Type of the left-hand side target matrix
1069  , typename MT4 // Type of the left-hand side matrix operand
1070  , typename MT5 > // Type of the right-hand side matrix operand
1071  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1072  {
1073  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
1074  DMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1075  else
1076  DMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1077  }
1079  //**********************************************************************************************
1080 
1081  //**Default addition assignment to dense matrices***********************************************
1095  template< typename MT3 // Type of the left-hand side target matrix
1096  , typename MT4 // Type of the left-hand side matrix operand
1097  , typename MT5 > // Type of the right-hand side matrix operand
1098  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1099  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1100  {
1101  const size_t M( A.rows() );
1102  const size_t N( B.columns() );
1103  const size_t K( A.columns() );
1104 
1105  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1106  const size_t end( N & size_t(-2) );
1107 
1108  for( size_t i=0UL; i<M; ++i ) {
1109  for( size_t k=0UL; k<K; ++k ) {
1110  for( size_t j=0UL; j<end; j+=2UL ) {
1111  C(i,j ) += A(i,k) * B(k,j );
1112  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1113  }
1114  if( end < N ) {
1115  C(i,end) += A(i,k) * B(k,end);
1116  }
1117  }
1118  }
1119  }
1121  //**********************************************************************************************
1122 
1123  //**Vectorized default addition assignment to row-major dense matrices**************************
1137  template< typename MT3 // Type of the left-hand side target matrix
1138  , typename MT4 // Type of the left-hand side matrix operand
1139  , typename MT5 > // Type of the right-hand side matrix operand
1140  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1141  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1142  {
1143  typedef IntrinsicTrait<ElementType> IT;
1144 
1145  const size_t M( A.rows() );
1146  const size_t N( B.columns() );
1147  const size_t K( A.columns() );
1148 
1149  size_t i( 0UL );
1150 
1151  for( ; (i+2UL) <= M; i+=2UL ) {
1152  size_t j( 0UL );
1153  for( ; (j+4UL) <= N; j+=4UL ) {
1154  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1155  for( size_t k=0UL; k<K; k+=IT::size ) {
1156  const IntrinsicType a1( A.load(i ,k) );
1157  const IntrinsicType a2( A.load(i+1UL,k) );
1158  const IntrinsicType b1( B.load(k,j ) );
1159  const IntrinsicType b2( B.load(k,j+1UL) );
1160  const IntrinsicType b3( B.load(k,j+2UL) );
1161  const IntrinsicType b4( B.load(k,j+3UL) );
1162  xmm1 = xmm1 + a1 * b1;
1163  xmm2 = xmm2 + a1 * b2;
1164  xmm3 = xmm3 + a1 * b3;
1165  xmm4 = xmm4 + a1 * b4;
1166  xmm5 = xmm5 + a2 * b1;
1167  xmm6 = xmm6 + a2 * b2;
1168  xmm7 = xmm7 + a2 * b3;
1169  xmm8 = xmm8 + a2 * b4;
1170  }
1171  (~C)(i ,j ) += sum( xmm1 );
1172  (~C)(i ,j+1UL) += sum( xmm2 );
1173  (~C)(i ,j+2UL) += sum( xmm3 );
1174  (~C)(i ,j+3UL) += sum( xmm4 );
1175  (~C)(i+1UL,j ) += sum( xmm5 );
1176  (~C)(i+1UL,j+1UL) += sum( xmm6 );
1177  (~C)(i+1UL,j+2UL) += sum( xmm7 );
1178  (~C)(i+1UL,j+3UL) += sum( xmm8 );
1179  }
1180  for( ; (j+2UL) <= N; j+=2UL ) {
1181  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1182  for( size_t k=0UL; k<K; k+=IT::size ) {
1183  const IntrinsicType a1( A.load(i ,k) );
1184  const IntrinsicType a2( A.load(i+1UL,k) );
1185  const IntrinsicType b1( B.load(k,j ) );
1186  const IntrinsicType b2( B.load(k,j+1UL) );
1187  xmm1 = xmm1 + a1 * b1;
1188  xmm2 = xmm2 + a1 * b2;
1189  xmm3 = xmm3 + a2 * b1;
1190  xmm4 = xmm4 + a2 * b2;
1191  }
1192  (~C)(i ,j ) += sum( xmm1 );
1193  (~C)(i ,j+1UL) += sum( xmm2 );
1194  (~C)(i+1UL,j ) += sum( xmm3 );
1195  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1196  }
1197  if( j < N ) {
1198  IntrinsicType xmm1, xmm2;
1199  for( size_t k=0UL; k<K; k+=IT::size ) {
1200  const IntrinsicType b1( B.load(k,j) );
1201  xmm1 = xmm1 + A.load(i ,k) * b1;
1202  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1203  }
1204  (~C)(i ,j) += sum( xmm1 );
1205  (~C)(i+1UL,j) += sum( xmm2 );
1206  }
1207  }
1208  if( i < M ) {
1209  size_t j( 0UL );
1210  for( ; (j+4UL) <= N; j+=4UL ) {
1211  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1212  for( size_t k=0UL; k<K; k+=IT::size ) {
1213  const IntrinsicType a1( A.load(i,k) );
1214  xmm1 = xmm1 + a1 * B.load(k,j );
1215  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1216  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1217  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1218  }
1219  (~C)(i,j ) += sum( xmm1 );
1220  (~C)(i,j+1UL) += sum( xmm2 );
1221  (~C)(i,j+2UL) += sum( xmm3 );
1222  (~C)(i,j+3UL) += sum( xmm4 );
1223  }
1224  for( ; (j+2UL) <= N; j+=2UL ) {
1225  IntrinsicType xmm1, xmm2;
1226  for( size_t k=0UL; k<K; k+=IT::size ) {
1227  const IntrinsicType a1( A.load(i,k) );
1228  xmm1 = xmm1 + a1 * B.load(k,j );
1229  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1230  }
1231  (~C)(i,j ) += sum( xmm1 );
1232  (~C)(i,j+1UL) += sum( xmm2 );
1233  }
1234  if( j < N ) {
1235  IntrinsicType xmm1, xmm2;
1236  for( size_t k=0UL; k<K; k+=IT::size ) {
1237  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1238  }
1239  (~C)(i,j) += sum( xmm1 );
1240  }
1241  }
1242  }
1244  //**********************************************************************************************
1245 
1246  //**Vectorized default addition assignment to column-major dense matrices***********************
1260  template< typename MT3 // Type of the left-hand side target matrix
1261  , typename MT4 // Type of the left-hand side matrix operand
1262  , typename MT5 > // Type of the right-hand side matrix operand
1263  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1264  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1265  {
1266  typedef IntrinsicTrait<ElementType> IT;
1267 
1268  const size_t M( A.rows() );
1269  const size_t N( B.columns() );
1270  const size_t K( A.columns() );
1271 
1272  size_t i( 0UL );
1273 
1274  for( ; (i+4UL) <= M; i+=4UL ) {
1275  size_t j( 0UL );
1276  for( ; (j+2UL) <= N; j+=2UL ) {
1277  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1278  for( size_t k=0UL; k<K; k+=IT::size ) {
1279  const IntrinsicType a1( A.load(i ,k) );
1280  const IntrinsicType a2( A.load(i+1UL,k) );
1281  const IntrinsicType a3( A.load(i+2UL,k) );
1282  const IntrinsicType a4( A.load(i+3UL,k) );
1283  const IntrinsicType b1( B.load(k,j ) );
1284  const IntrinsicType b2( B.load(k,j+1UL) );
1285  xmm1 = xmm1 + a1 * b1;
1286  xmm2 = xmm2 + a1 * b2;
1287  xmm3 = xmm3 + a2 * b1;
1288  xmm4 = xmm4 + a2 * b2;
1289  xmm5 = xmm5 + a3 * b1;
1290  xmm6 = xmm6 + a3 * b2;
1291  xmm7 = xmm7 + a4 * b1;
1292  xmm8 = xmm8 + a4 * b2;
1293  }
1294  (~C)(i ,j ) += sum( xmm1 );
1295  (~C)(i ,j+1UL) += sum( xmm2 );
1296  (~C)(i+1UL,j ) += sum( xmm3 );
1297  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1298  (~C)(i+2UL,j ) += sum( xmm5 );
1299  (~C)(i+2UL,j+1UL) += sum( xmm6 );
1300  (~C)(i+3UL,j ) += sum( xmm7 );
1301  (~C)(i+3UL,j+1UL) += sum( xmm8 );
1302  }
1303  if( j < N ) {
1304  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1305  for( size_t k=0UL; k<K; k+=IT::size ) {
1306  const IntrinsicType b1( B.load(k,j) );
1307  xmm1 = xmm1 + A.load(i ,k) * b1;
1308  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1309  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1310  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1311  }
1312  (~C)(i ,j) += sum( xmm1 );
1313  (~C)(i+1UL,j) += sum( xmm2 );
1314  (~C)(i+2UL,j) += sum( xmm3 );
1315  (~C)(i+3UL,j) += sum( xmm4 );
1316  }
1317  }
1318  for( ; (i+2UL) <= M; i+=2UL ) {
1319  size_t j( 0UL );
1320  for( ; (j+2UL) <= N; j+=2UL ) {
1321  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1322  for( size_t k=0UL; k<K; k+=IT::size ) {
1323  const IntrinsicType a1( A.load(i ,k) );
1324  const IntrinsicType a2( A.load(i+1UL,k) );
1325  const IntrinsicType b1( B.load(k,j ) );
1326  const IntrinsicType b2( B.load(k,j+1UL) );
1327  xmm1 = xmm1 + a1 * b1;
1328  xmm2 = xmm2 + a1 * b2;
1329  xmm3 = xmm3 + a2 * b1;
1330  xmm4 = xmm4 + a2 * b2;
1331  }
1332  (~C)(i ,j ) += sum( xmm1 );
1333  (~C)(i ,j+1UL) += sum( xmm2 );
1334  (~C)(i+1UL,j ) += sum( xmm3 );
1335  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1336  }
1337  if( j < N ) {
1338  IntrinsicType xmm1, xmm2;
1339  for( size_t k=0UL; k<K; k+=IT::size ) {
1340  const IntrinsicType b1( B.load(k,j) );
1341  xmm1 = xmm1 + A.load(i ,k) * b1;
1342  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1343  }
1344  (~C)(i ,j) += sum( xmm1 );
1345  (~C)(i+1UL,j) += sum( xmm2 );
1346  }
1347  }
1348  if( i < M ) {
1349  size_t j( 0UL );
1350  for( ; (j+2UL) <= N; j+=2UL ) {
1351  IntrinsicType xmm1, xmm2;
1352  for( size_t k=0UL; k<K; k+=IT::size ) {
1353  const IntrinsicType a1( A.load(i,k) );
1354  xmm1 = xmm1 + a1 * B.load(k,j );
1355  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1356  }
1357  (~C)(i,j ) += sum( xmm1 );
1358  (~C)(i,j+1UL) += sum( xmm2 );
1359  }
1360  if( j < N ) {
1361  IntrinsicType xmm1, xmm2;
1362  for( size_t k=0UL; k<K; k+=IT::size ) {
1363  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1364  }
1365  (~C)(i,j) += sum( xmm1 );
1366  }
1367  }
1368  }
1370  //**********************************************************************************************
1371 
1372  //**Default addition assignment to dense matrices***********************************************
1386  template< typename MT3 // Type of the left-hand side target matrix
1387  , typename MT4 // Type of the left-hand side matrix operand
1388  , typename MT5 > // Type of the right-hand side matrix operand
1389  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1390  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1391  {
1392  selectDefaultAddAssignKernel( C, A, B );
1393  }
1395  //**********************************************************************************************
1396 
1397  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1398 #if BLAZE_BLAS_MODE
1399 
1412  template< typename MT3 // Type of the left-hand side target matrix
1413  , typename MT4 // Type of the left-hand side matrix operand
1414  , typename MT5 > // Type of the right-hand side matrix operand
1415  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1416  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1417  {
1418  using boost::numeric_cast;
1419 
1423 
1424  const int M ( numeric_cast<int>( A.rows() ) );
1425  const int N ( numeric_cast<int>( B.columns() ) );
1426  const int K ( numeric_cast<int>( A.columns() ) );
1427  const int lda( numeric_cast<int>( A.spacing() ) );
1428  const int ldb( numeric_cast<int>( B.spacing() ) );
1429  const int ldc( numeric_cast<int>( C.spacing() ) );
1430 
1431  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1432  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1433  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1434  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1435  }
1437 #endif
1438  //**********************************************************************************************
1439 
1440  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1441 #if BLAZE_BLAS_MODE
1442 
1455  template< typename MT3 // Type of the left-hand side target matrix
1456  , typename MT4 // Type of the left-hand side matrix operand
1457  , typename MT5 > // Type of the right-hand side matrix operand
1458  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1459  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1460  {
1461  using boost::numeric_cast;
1462 
1466 
1467  const int M ( numeric_cast<int>( A.rows() ) );
1468  const int N ( numeric_cast<int>( B.columns() ) );
1469  const int K ( numeric_cast<int>( A.columns() ) );
1470  const int lda( numeric_cast<int>( A.spacing() ) );
1471  const int ldb( numeric_cast<int>( B.spacing() ) );
1472  const int ldc( numeric_cast<int>( C.spacing() ) );
1473 
1474  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1475  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1476  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1477  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1478  }
1480 #endif
1481  //**********************************************************************************************
1482 
1483  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1484 #if BLAZE_BLAS_MODE
1485 
1498  template< typename MT3 // Type of the left-hand side target matrix
1499  , typename MT4 // Type of the left-hand side matrix operand
1500  , typename MT5 > // Type of the right-hand side matrix operand
1501  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1502  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1503  {
1504  using boost::numeric_cast;
1505 
1509  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1510  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1511  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1512 
1513  const int M ( numeric_cast<int>( A.rows() ) );
1514  const int N ( numeric_cast<int>( B.columns() ) );
1515  const int K ( numeric_cast<int>( A.columns() ) );
1516  const int lda( numeric_cast<int>( A.spacing() ) );
1517  const int ldb( numeric_cast<int>( B.spacing() ) );
1518  const int ldc( numeric_cast<int>( C.spacing() ) );
1519  const complex<float> alpha( 1.0F, 0.0F );
1520  const complex<float> beta ( 1.0F, 0.0F );
1521 
1522  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1523  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1524  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1525  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1526  }
1528 #endif
1529  //**********************************************************************************************
1530 
1531  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1532 #if BLAZE_BLAS_MODE
1533 
1546  template< typename MT3 // Type of the left-hand side target matrix
1547  , typename MT4 // Type of the left-hand side matrix operand
1548  , typename MT5 > // Type of the right-hand side matrix operand
1549  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1550  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1551  {
1552  using boost::numeric_cast;
1553 
1557  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1558  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1559  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1560 
1561  const int M ( numeric_cast<int>( A.rows() ) );
1562  const int N ( numeric_cast<int>( B.columns() ) );
1563  const int K ( numeric_cast<int>( A.columns() ) );
1564  const int lda( numeric_cast<int>( A.spacing() ) );
1565  const int ldb( numeric_cast<int>( B.spacing() ) );
1566  const int ldc( numeric_cast<int>( C.spacing() ) );
1567  const complex<double> alpha( 1.0, 0.0 );
1568  const complex<double> beta ( 1.0, 0.0 );
1569 
1570  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1571  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1572  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1573  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1574  }
1576 #endif
1577  //**********************************************************************************************
1578 
1579  //**Addition assignment to sparse matrices******************************************************
1580  // No special implementation for the addition assignment to sparse matrices.
1581  //**********************************************************************************************
1582 
1583  //**Subtraction assignment to dense matrices****************************************************
1596  template< typename MT // Type of the target dense matrix
1597  , bool SO > // Storage order of the target dense matrix
1598  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1599  {
1601 
1602  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1603  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1604 
1605  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1606  return;
1607  }
1608 
1609  LT A( serial( rhs.lhs_ ) ); // Evaluation of the left-hand side dense matrix operand
1610  RT B( serial( rhs.rhs_ ) ); // Evaluation of the right-hand side dense matrix operand
1611 
1612  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1613  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1614  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1615  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1616  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1617  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1618 
1619  DMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1620  }
1622  //**********************************************************************************************
1623 
1624  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1635  template< typename MT3 // Type of the left-hand side target matrix
1636  , typename MT4 // Type of the left-hand side matrix operand
1637  , typename MT5 > // Type of the right-hand side matrix operand
1638  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1639  {
1640  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
1641  DMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1642  else
1643  DMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1644  }
1646  //**********************************************************************************************
1647 
1648  //**Default subtraction assignment to dense matrices********************************************
1662  template< typename MT3 // Type of the left-hand side target matrix
1663  , typename MT4 // Type of the left-hand side matrix operand
1664  , typename MT5 > // Type of the right-hand side matrix operand
1665  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1666  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1667  {
1668  const size_t M( A.rows() );
1669  const size_t N( B.columns() );
1670  const size_t K( A.columns() );
1671 
1672  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1673  const size_t end( N & size_t(-2) );
1674 
1675  for( size_t i=0UL; i<M; ++i ) {
1676  for( size_t k=0UL; k<K; ++k ) {
1677  for( size_t j=0UL; j<end; j+=2UL ) {
1678  C(i,j ) -= A(i,k) * B(k,j );
1679  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1680  }
1681  if( end < N ) {
1682  C(i,end) -= A(i,k) * B(k,end);
1683  }
1684  }
1685  }
1686  }
1688  //**********************************************************************************************
1689 
1690  //**Default subtraction assignment to row-major dense matrices**********************************
1704  template< typename MT3 // Type of the left-hand side target matrix
1705  , typename MT4 // Type of the left-hand side matrix operand
1706  , typename MT5 > // Type of the right-hand side matrix operand
1707  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1708  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1709  {
1710  typedef IntrinsicTrait<ElementType> IT;
1711 
1712  const size_t M( A.rows() );
1713  const size_t N( B.columns() );
1714  const size_t K( A.columns() );
1715 
1716  size_t i( 0UL );
1717 
1718  for( ; (i+2UL) <= M; i+=2UL ) {
1719  size_t j( 0UL );
1720  for( ; (j+4UL) <= N; j+=4UL ) {
1721  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1722  for( size_t k=0UL; k<K; k+=IT::size ) {
1723  const IntrinsicType a1( A.load(i ,k) );
1724  const IntrinsicType a2( A.load(i+1UL,k) );
1725  const IntrinsicType b1( B.load(k,j ) );
1726  const IntrinsicType b2( B.load(k,j+1UL) );
1727  const IntrinsicType b3( B.load(k,j+2UL) );
1728  const IntrinsicType b4( B.load(k,j+3UL) );
1729  xmm1 = xmm1 + a1 * b1;
1730  xmm2 = xmm2 + a1 * b2;
1731  xmm3 = xmm3 + a1 * b3;
1732  xmm4 = xmm4 + a1 * b4;
1733  xmm5 = xmm5 + a2 * b1;
1734  xmm6 = xmm6 + a2 * b2;
1735  xmm7 = xmm7 + a2 * b3;
1736  xmm8 = xmm8 + a2 * b4;
1737  }
1738  (~C)(i ,j ) -= sum( xmm1 );
1739  (~C)(i ,j+1UL) -= sum( xmm2 );
1740  (~C)(i ,j+2UL) -= sum( xmm3 );
1741  (~C)(i ,j+3UL) -= sum( xmm4 );
1742  (~C)(i+1UL,j ) -= sum( xmm5 );
1743  (~C)(i+1UL,j+1UL) -= sum( xmm6 );
1744  (~C)(i+1UL,j+2UL) -= sum( xmm7 );
1745  (~C)(i+1UL,j+3UL) -= sum( xmm8 );
1746  }
1747  for( ; (j+2UL) <= N; j+=2UL ) {
1748  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1749  for( size_t k=0UL; k<K; k+=IT::size ) {
1750  const IntrinsicType a1( A.load(i ,k) );
1751  const IntrinsicType a2( A.load(i+1UL,k) );
1752  const IntrinsicType b1( B.load(k,j ) );
1753  const IntrinsicType b2( B.load(k,j+1UL) );
1754  xmm1 = xmm1 + a1 * b1;
1755  xmm2 = xmm2 + a1 * b2;
1756  xmm3 = xmm3 + a2 * b1;
1757  xmm4 = xmm4 + a2 * b2;
1758  }
1759  (~C)(i ,j ) -= sum( xmm1 );
1760  (~C)(i ,j+1UL) -= sum( xmm2 );
1761  (~C)(i+1UL,j ) -= sum( xmm3 );
1762  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1763  }
1764  if( j < N ) {
1765  IntrinsicType xmm1, xmm2;
1766  for( size_t k=0UL; k<K; k+=IT::size ) {
1767  const IntrinsicType b1( B.load(k,j) );
1768  xmm1 = xmm1 + A.load(i ,k) * b1;
1769  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1770  }
1771  (~C)(i ,j) -= sum( xmm1 );
1772  (~C)(i+1UL,j) -= sum( xmm2 );
1773  }
1774  }
1775  if( i < M ) {
1776  size_t j( 0UL );
1777  for( ; (j+4UL) <= N; j+=4UL ) {
1778  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1779  for( size_t k=0UL; k<K; k+=IT::size ) {
1780  const IntrinsicType a1( A.load(i,k) );
1781  xmm1 = xmm1 + a1 * B.load(k,j );
1782  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1783  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1784  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1785  }
1786  (~C)(i,j ) -= sum( xmm1 );
1787  (~C)(i,j+1UL) -= sum( xmm2 );
1788  (~C)(i,j+2UL) -= sum( xmm3 );
1789  (~C)(i,j+3UL) -= sum( xmm4 );
1790  }
1791  for( ; (j+2UL) <= N; j+=2UL ) {
1792  IntrinsicType xmm1, xmm2;
1793  for( size_t k=0UL; k<K; k+=IT::size ) {
1794  const IntrinsicType a1( A.load(i,k) );
1795  xmm1 = xmm1 + a1 * B.load(k,j );
1796  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1797  }
1798  (~C)(i,j ) -= sum( xmm1 );
1799  (~C)(i,j+1UL) -= sum( xmm2 );
1800  }
1801  if( j < N ) {
1802  IntrinsicType xmm1, xmm2;
1803  for( size_t k=0UL; k<K; k+=IT::size ) {
1804  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1805  }
1806  (~C)(i,j) -= sum( xmm1 );
1807  }
1808  }
1809  }
1811  //**********************************************************************************************
1812 
1813  //**Default subtraction assignment to column-major dense matrices*******************************
1827  template< typename MT3 // Type of the left-hand side target matrix
1828  , typename MT4 // Type of the left-hand side matrix operand
1829  , typename MT5 > // Type of the right-hand side matrix operand
1830  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1831  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1832  {
1833  typedef IntrinsicTrait<ElementType> IT;
1834 
1835  const size_t M( A.rows() );
1836  const size_t N( B.columns() );
1837  const size_t K( A.columns() );
1838 
1839  size_t i( 0UL );
1840 
1841  for( ; (i+4UL) <= M; i+=4UL ) {
1842  size_t j( 0UL );
1843  for( ; (j+2UL) <= N; j+=2UL ) {
1844  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1845  for( size_t k=0UL; k<K; k+=IT::size ) {
1846  const IntrinsicType a1( A.load(i ,k) );
1847  const IntrinsicType a2( A.load(i+1UL,k) );
1848  const IntrinsicType a3( A.load(i+2UL,k) );
1849  const IntrinsicType a4( A.load(i+3UL,k) );
1850  const IntrinsicType b1( B.load(k,j ) );
1851  const IntrinsicType b2( B.load(k,j+1UL) );
1852  xmm1 = xmm1 + a1 * b1;
1853  xmm2 = xmm2 + a1 * b2;
1854  xmm3 = xmm3 + a2 * b1;
1855  xmm4 = xmm4 + a2 * b2;
1856  xmm5 = xmm5 + a3 * b1;
1857  xmm6 = xmm6 + a3 * b2;
1858  xmm7 = xmm7 + a4 * b1;
1859  xmm8 = xmm8 + a4 * b2;
1860  }
1861  (~C)(i ,j ) -= sum( xmm1 );
1862  (~C)(i ,j+1UL) -= sum( xmm2 );
1863  (~C)(i+1UL,j ) -= sum( xmm3 );
1864  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1865  (~C)(i+2UL,j ) -= sum( xmm5 );
1866  (~C)(i+2UL,j+1UL) -= sum( xmm6 );
1867  (~C)(i+3UL,j ) -= sum( xmm7 );
1868  (~C)(i+3UL,j+1UL) -= sum( xmm8 );
1869  }
1870  if( j < N ) {
1871  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1872  for( size_t k=0UL; k<K; k+=IT::size ) {
1873  const IntrinsicType b1( B.load(k,j) );
1874  xmm1 = xmm1 + A.load(i ,k) * b1;
1875  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1876  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1877  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1878  }
1879  (~C)(i ,j) -= sum( xmm1 );
1880  (~C)(i+1UL,j) -= sum( xmm2 );
1881  (~C)(i+2UL,j) -= sum( xmm3 );
1882  (~C)(i+3UL,j) -= sum( xmm4 );
1883  }
1884  }
1885  for( ; (i+2UL) <= M; i+=2UL ) {
1886  size_t j( 0UL );
1887  for( ; (j+2UL) <= N; j+=2UL ) {
1888  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1889  for( size_t k=0UL; k<K; k+=IT::size ) {
1890  const IntrinsicType a1( A.load(i ,k) );
1891  const IntrinsicType a2( A.load(i+1UL,k) );
1892  const IntrinsicType b1( B.load(k,j ) );
1893  const IntrinsicType b2( B.load(k,j+1UL) );
1894  xmm1 = xmm1 + a1 * b1;
1895  xmm2 = xmm2 + a1 * b2;
1896  xmm3 = xmm3 + a2 * b1;
1897  xmm4 = xmm4 + a2 * b2;
1898  }
1899  (~C)(i ,j ) -= sum( xmm1 );
1900  (~C)(i ,j+1UL) -= sum( xmm2 );
1901  (~C)(i+1UL,j ) -= sum( xmm3 );
1902  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1903  }
1904  if( j < N ) {
1905  IntrinsicType xmm1, xmm2;
1906  for( size_t k=0UL; k<K; k+=IT::size ) {
1907  const IntrinsicType b1( B.load(k,j) );
1908  xmm1 = xmm1 + A.load(i ,k) * b1;
1909  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1910  }
1911  (~C)(i ,j) -= sum( xmm1 );
1912  (~C)(i+1UL,j) -= sum( xmm2 );
1913  }
1914  }
1915  if( i < M ) {
1916  size_t j( 0UL );
1917  for( ; (j+2UL) <= N; j+=2UL ) {
1918  IntrinsicType xmm1, xmm2;
1919  for( size_t k=0UL; k<K; k+=IT::size ) {
1920  const IntrinsicType a1( A.load(i,k) );
1921  xmm1 = xmm1 + a1 * B.load(k,j );
1922  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1923  }
1924  (~C)(i,j ) -= sum( xmm1 );
1925  (~C)(i,j+1UL) -= sum( xmm2 );
1926  }
1927  if( j < N ) {
1928  IntrinsicType xmm1, xmm2;
1929  for( size_t k=0UL; k<K; k+=IT::size ) {
1930  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1931  }
1932  (~C)(i,j) -= sum( xmm1 );
1933  }
1934  }
1935  }
1937  //**********************************************************************************************
1938 
1939  //**Default subtraction assignment to dense matrices********************************************
1953  template< typename MT3 // Type of the left-hand side target matrix
1954  , typename MT4 // Type of the left-hand side matrix operand
1955  , typename MT5 > // Type of the right-hand side matrix operand
1956  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1957  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1958  {
1959  selectDefaultSubAssignKernel( C, A, B );
1960  }
1962  //**********************************************************************************************
1963 
1964  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
1965 #if BLAZE_BLAS_MODE
1966 
1979  template< typename MT3 // Type of the left-hand side target matrix
1980  , typename MT4 // Type of the left-hand side matrix operand
1981  , typename MT5 > // Type of the right-hand side matrix operand
1982  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1983  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1984  {
1985  using boost::numeric_cast;
1986 
1990 
1991  const int M ( numeric_cast<int>( A.rows() ) );
1992  const int N ( numeric_cast<int>( B.columns() ) );
1993  const int K ( numeric_cast<int>( A.columns() ) );
1994  const int lda( numeric_cast<int>( A.spacing() ) );
1995  const int ldb( numeric_cast<int>( B.spacing() ) );
1996  const int ldc( numeric_cast<int>( C.spacing() ) );
1997 
1998  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1999  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2000  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2001  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
2002  }
2004 #endif
2005  //**********************************************************************************************
2006 
2007  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
2008 #if BLAZE_BLAS_MODE
2009 
2022  template< typename MT3 // Type of the left-hand side target matrix
2023  , typename MT4 // Type of the left-hand side matrix operand
2024  , typename MT5 > // Type of the right-hand side matrix operand
2025  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2026  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2027  {
2028  using boost::numeric_cast;
2029 
2033 
2034  const int M ( numeric_cast<int>( A.rows() ) );
2035  const int N ( numeric_cast<int>( B.columns() ) );
2036  const int K ( numeric_cast<int>( A.columns() ) );
2037  const int lda( numeric_cast<int>( A.spacing() ) );
2038  const int ldb( numeric_cast<int>( B.spacing() ) );
2039  const int ldc( numeric_cast<int>( C.spacing() ) );
2040 
2041  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2042  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2043  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2044  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
2045  }
2047 #endif
2048  //**********************************************************************************************
2049 
2050  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
2051 #if BLAZE_BLAS_MODE
2052 
2065  template< typename MT3 // Type of the left-hand side target matrix
2066  , typename MT4 // Type of the left-hand side matrix operand
2067  , typename MT5 > // Type of the right-hand side matrix operand
2068  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2069  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2070  {
2071  using boost::numeric_cast;
2072 
2076  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2077  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2078  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2079 
2080  const int M ( numeric_cast<int>( A.rows() ) );
2081  const int N ( numeric_cast<int>( B.columns() ) );
2082  const int K ( numeric_cast<int>( A.columns() ) );
2083  const int lda( numeric_cast<int>( A.spacing() ) );
2084  const int ldb( numeric_cast<int>( B.spacing() ) );
2085  const int ldc( numeric_cast<int>( C.spacing() ) );
2086  const complex<float> alpha( -1.0F, 0.0F );
2087  const complex<float> beta ( 1.0F, 0.0F );
2088 
2089  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2090  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2091  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2092  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2093  }
2095 #endif
2096  //**********************************************************************************************
2097 
2098  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
2099 #if BLAZE_BLAS_MODE
2100 
2113  template< typename MT3 // Type of the left-hand side target matrix
2114  , typename MT4 // Type of the left-hand side matrix operand
2115  , typename MT5 > // Type of the right-hand side matrix operand
2116  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2117  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2118  {
2119  using boost::numeric_cast;
2120 
2124  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2125  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2126  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2127 
2128  const int M ( numeric_cast<int>( A.rows() ) );
2129  const int N ( numeric_cast<int>( B.columns() ) );
2130  const int K ( numeric_cast<int>( A.columns() ) );
2131  const int lda( numeric_cast<int>( A.spacing() ) );
2132  const int ldb( numeric_cast<int>( B.spacing() ) );
2133  const int ldc( numeric_cast<int>( C.spacing() ) );
2134  const complex<double> alpha( -1.0, 0.0 );
2135  const complex<double> beta ( 1.0, 0.0 );
2136 
2137  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2138  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2139  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2140  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2141  }
2143 #endif
2144  //**********************************************************************************************
2145 
2146  //**Subtraction assignment to sparse matrices***************************************************
2147  // No special implementation for the subtraction assignment to sparse matrices.
2148  //**********************************************************************************************
2149 
2150  //**Multiplication assignment to dense matrices*************************************************
2151  // No special implementation for the multiplication assignment to dense matrices.
2152  //**********************************************************************************************
2153 
2154  //**Multiplication assignment to sparse matrices************************************************
2155  // No special implementation for the multiplication assignment to sparse matrices.
2156  //**********************************************************************************************
2157 
2158  //**SMP assignment to dense matrices************************************************************
2173  template< typename MT // Type of the target dense matrix
2174  , bool SO > // Storage order of the target dense matrix
2175  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2176  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2177  {
2179 
2180  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2181  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2182 
2183  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2184  return;
2185  }
2186  else if( rhs.lhs_.columns() == 0UL ) {
2187  reset( ~lhs );
2188  return;
2189  }
2190 
2191  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2192  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2193 
2194  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2195  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2196  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2197  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2198  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2199  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2200 
2201  smpAssign( ~lhs, A * B );
2202  }
2204  //**********************************************************************************************
2205 
2206  //**SMP assignment to sparse matrices***********************************************************
2220  template< typename MT // Type of the target sparse matrix
2221  , bool SO > // Storage order of the target sparse matrix
2222  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2223  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2224  {
2226 
2227  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2228 
2235 
2236  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2237  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2238 
2239  const TmpType tmp( rhs );
2240  smpAssign( ~lhs, tmp );
2241  }
2243  //**********************************************************************************************
2244 
2245  //**SMP addition assignment to dense matrices***************************************************
2260  template< typename MT // Type of the target dense matrix
2261  , bool SO > // Storage order of the target dense matrix
2262  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2263  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2264  {
2266 
2267  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2268  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2269 
2270  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2271  return;
2272  }
2273 
2274  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2275  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2276 
2277  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2278  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2279  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2280  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2281  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2282  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2283 
2284  smpAddAssign( ~lhs, A * B );
2285  }
2287  //**********************************************************************************************
2288 
2289  //**SMP addition assignment to sparse matrices**************************************************
2290  // No special implementation for the SMP addition assignment to sparse matrices.
2291  //**********************************************************************************************
2292 
2293  //**SMP subtraction assignment to dense matrices************************************************
2308  template< typename MT // Type of the target dense matrix
2309  , bool SO > // Storage order of the target dense matrix
2310  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2311  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
2312  {
2314 
2315  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2316  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2317 
2318  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2319  return;
2320  }
2321 
2322  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
2323  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
2324 
2325  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
2326  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
2327  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
2328  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
2329  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2330  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
2331 
2332  smpSubAssign( ~lhs, A * B );
2333  }
2335  //**********************************************************************************************
2336 
2337  //**SMP subtraction assignment to sparse matrices***********************************************
2338  // No special implementation for the SMP subtraction assignment to sparse matrices.
2339  //**********************************************************************************************
2340 
2341  //**SMP multiplication assignment to dense matrices*********************************************
2342  // No special implementation for the SMP multiplication assignment to dense matrices.
2343  //**********************************************************************************************
2344 
2345  //**SMP multiplication assignment to sparse matrices********************************************
2346  // No special implementation for the SMP multiplication assignment to sparse matrices.
2347  //**********************************************************************************************
2348 
2349  //**Compile time checks*************************************************************************
2356  //**********************************************************************************************
2357 };
2358 //*************************************************************************************************
2359 
2360 
2361 
2362 
2363 //=================================================================================================
2364 //
2365 // DMATSCALARMULTEXPR SPECIALIZATION
2366 //
2367 //=================================================================================================
2368 
2369 //*************************************************************************************************
2377 template< typename MT1 // Type of the left-hand side dense matrix
2378  , typename MT2 // Type of the right-hand side dense matrix
2379  , typename ST > // Type of the right-hand side scalar value
2380 class DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >
2381  : public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
2382  , private MatScalarMultExpr
2383  , private Computation
2384 {
2385  private:
2386  //**Type definitions****************************************************************************
2387  typedef DMatTDMatMultExpr<MT1,MT2> MMM;
2388  typedef typename MMM::ResultType RES;
2389  typedef typename MT1::ResultType RT1;
2390  typedef typename MT2::ResultType RT2;
2391  typedef typename RT1::ElementType ET1;
2392  typedef typename RT2::ElementType ET2;
2393  typedef typename MT1::CompositeType CT1;
2394  typedef typename MT2::CompositeType CT2;
2395  //**********************************************************************************************
2396 
2397  //**********************************************************************************************
2399  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2400  //**********************************************************************************************
2401 
2402  //**********************************************************************************************
2404  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2405  //**********************************************************************************************
2406 
2407  //**********************************************************************************************
2409 
2412  template< typename MT >
2413  struct UseSMPAssign {
2414  enum { value = ( evaluateLeft || evaluateRight ) };
2415  };
2416  //**********************************************************************************************
2417 
2418  //**********************************************************************************************
2420 
2423  template< typename T1, typename T2, typename T3, typename T4 >
2424  struct UseSinglePrecisionKernel {
2425  enum { value = IsFloat<typename T1::ElementType>::value &&
2426  IsFloat<typename T2::ElementType>::value &&
2427  IsFloat<typename T3::ElementType>::value &&
2428  !IsComplex<T4>::value };
2429  };
2430  //**********************************************************************************************
2431 
2432  //**********************************************************************************************
2434 
2437  template< typename T1, typename T2, typename T3, typename T4 >
2438  struct UseDoublePrecisionKernel {
2439  enum { value = IsDouble<typename T1::ElementType>::value &&
2440  IsDouble<typename T2::ElementType>::value &&
2441  IsDouble<typename T3::ElementType>::value &&
2442  !IsComplex<T4>::value };
2443  };
2444  //**********************************************************************************************
2445 
2446  //**********************************************************************************************
2448 
2451  template< typename T1, typename T2, typename T3 >
2452  struct UseSinglePrecisionComplexKernel {
2453  typedef complex<float> Type;
2454  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2455  IsSame<typename T2::ElementType,Type>::value &&
2456  IsSame<typename T3::ElementType,Type>::value };
2457  };
2458  //**********************************************************************************************
2459 
2460  //**********************************************************************************************
2462 
2465  template< typename T1, typename T2, typename T3 >
2466  struct UseDoublePrecisionComplexKernel {
2467  typedef complex<double> Type;
2468  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2469  IsSame<typename T2::ElementType,Type>::value &&
2470  IsSame<typename T3::ElementType,Type>::value };
2471  };
2472  //**********************************************************************************************
2473 
2474  //**********************************************************************************************
2476 
2478  template< typename T1, typename T2, typename T3, typename T4 >
2479  struct UseDefaultKernel {
2480  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2481  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2482  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2483  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2484  };
2485  //**********************************************************************************************
2486 
2487  //**********************************************************************************************
2489 
2491  template< typename T1, typename T2, typename T3, typename T4 >
2492  struct UseVectorizedDefaultKernel {
2493  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2494  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2495  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2496  IsSame<typename T1::ElementType,T4>::value &&
2497  IntrinsicTrait<typename T1::ElementType>::addition &&
2498  IntrinsicTrait<typename T1::ElementType>::multiplication };
2499  };
2500  //**********************************************************************************************
2501 
2502  public:
2503  //**Type definitions****************************************************************************
2504  typedef DMatScalarMultExpr<MMM,ST,false> This;
2505  typedef typename MultTrait<RES,ST>::Type ResultType;
2506  typedef typename ResultType::OppositeType OppositeType;
2507  typedef typename ResultType::TransposeType TransposeType;
2508  typedef typename ResultType::ElementType ElementType;
2509  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2510  typedef const ElementType ReturnType;
2511  typedef const ResultType CompositeType;
2512 
2514  typedef const DMatTDMatMultExpr<MT1,MT2> LeftOperand;
2515 
2517  typedef ST RightOperand;
2518 
2520  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2521 
2523  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2524  //**********************************************************************************************
2525 
2526  //**Compilation flags***************************************************************************
2528  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2529  IsSame<ET1,ET2>::value &&
2530  IsSame<ET1,ST>::value &&
2531  IntrinsicTrait<ET1>::addition &&
2532  IntrinsicTrait<ET1>::multiplication };
2533 
2535  enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2536  !evaluateRight && MT2::smpAssignable };
2537  //**********************************************************************************************
2538 
2539  //**Constructor*********************************************************************************
2545  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2546  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2547  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2548  {}
2549  //**********************************************************************************************
2550 
2551  //**Access operator*****************************************************************************
2558  inline ReturnType operator()( size_t i, size_t j ) const {
2559  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2560  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2561  return matrix_(i,j) * scalar_;
2562  }
2563  //**********************************************************************************************
2564 
2565  //**Rows function*******************************************************************************
2570  inline size_t rows() const {
2571  return matrix_.rows();
2572  }
2573  //**********************************************************************************************
2574 
2575  //**Columns function****************************************************************************
2580  inline size_t columns() const {
2581  return matrix_.columns();
2582  }
2583  //**********************************************************************************************
2584 
2585  //**Left operand access*************************************************************************
2590  inline LeftOperand leftOperand() const {
2591  return matrix_;
2592  }
2593  //**********************************************************************************************
2594 
2595  //**Right operand access************************************************************************
2600  inline RightOperand rightOperand() const {
2601  return scalar_;
2602  }
2603  //**********************************************************************************************
2604 
2605  //**********************************************************************************************
2611  template< typename T >
2612  inline bool canAlias( const T* alias ) const {
2613  return matrix_.canAlias( alias );
2614  }
2615  //**********************************************************************************************
2616 
2617  //**********************************************************************************************
2623  template< typename T >
2624  inline bool isAliased( const T* alias ) const {
2625  return matrix_.isAliased( alias );
2626  }
2627  //**********************************************************************************************
2628 
2629  //**********************************************************************************************
2634  inline bool isAligned() const {
2635  return matrix_.isAligned();
2636  }
2637  //**********************************************************************************************
2638 
2639  //**********************************************************************************************
2644  inline bool canSMPAssign() const {
2645  typename MMM::LeftOperand A( matrix_.leftOperand() );
2646  return ( !BLAZE_BLAS_IS_PARALLEL ||
2647  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
2648  ( A.rows() > SMP_DMATTDMATMULT_THRESHOLD );
2649  }
2650  //**********************************************************************************************
2651 
2652  private:
2653  //**Member variables****************************************************************************
2654  LeftOperand matrix_;
2655  RightOperand scalar_;
2656  //**********************************************************************************************
2657 
2658  //**Assignment to dense matrices****************************************************************
2670  template< typename MT // Type of the target dense matrix
2671  , bool SO > // Storage order of the target dense matrix
2672  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
2673  {
2675 
2676  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2677  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2678 
2679  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2680  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2681 
2682  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2683  return;
2684  }
2685  else if( left.columns() == 0UL ) {
2686  reset( ~lhs );
2687  return;
2688  }
2689 
2690  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
2691  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
2692 
2693  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2694  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2695  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2696  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2697  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2698  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2699 
2700  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2701  }
2702  //**********************************************************************************************
2703 
2704  //**Assignment to dense matrices (kernel selection)*********************************************
2715  template< typename MT3 // Type of the left-hand side target matrix
2716  , typename MT4 // Type of the left-hand side matrix operand
2717  , typename MT5 // Type of the right-hand side matrix operand
2718  , typename ST2 > // Type of the scalar value
2719  static inline void selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2720  {
2721  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
2722  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2723  else
2724  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2725  }
2726  //**********************************************************************************************
2727 
2728  //**Default assignment to dense matrices********************************************************
2742  template< typename MT3 // Type of the left-hand side target matrix
2743  , typename MT4 // Type of the left-hand side matrix operand
2744  , typename MT5 // Type of the right-hand side matrix operand
2745  , typename ST2 > // Type of the scalar value
2746  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2747  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2748  {
2749  for( size_t i=0UL; i<A.rows(); ++i ) {
2750  for( size_t k=0UL; k<B.columns(); ++k ) {
2751  C(i,k) = A(i,0UL) * B(0UL,k);
2752  }
2753  for( size_t j=1UL; j<A.columns(); ++j ) {
2754  for( size_t k=0UL; k<B.columns(); ++k ) {
2755  C(i,k) += A(i,j) * B(j,k);
2756  }
2757  }
2758  for( size_t k=0UL; k<B.columns(); ++k ) {
2759  C(i,k) *= scalar;
2760  }
2761  }
2762  }
2763  //**********************************************************************************************
2764 
2765  //**Vectorized default assignment to row-major dense matrices***********************************
2779  template< typename MT3 // Type of the left-hand side target matrix
2780  , typename MT4 // Type of the left-hand side matrix operand
2781  , typename MT5 // Type of the right-hand side matrix operand
2782  , typename ST2 > // Type of the scalar value
2783  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2784  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2785  {
2786  typedef IntrinsicTrait<ElementType> IT;
2787 
2788  const size_t M( A.rows() );
2789  const size_t N( B.columns() );
2790  const size_t K( A.columns() );
2791 
2792  size_t i( 0UL );
2793 
2794  for( ; (i+2UL) <= M; i+=2UL ) {
2795  size_t j( 0UL );
2796  for( ; (j+4UL) <= N; j+=4UL ) {
2797  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2798  for( size_t k=0UL; k<K; k+=IT::size ) {
2799  const IntrinsicType a1( A.load(i ,k) );
2800  const IntrinsicType a2( A.load(i+1UL,k) );
2801  const IntrinsicType b1( B.load(k,j ) );
2802  const IntrinsicType b2( B.load(k,j+1UL) );
2803  const IntrinsicType b3( B.load(k,j+2UL) );
2804  const IntrinsicType b4( B.load(k,j+3UL) );
2805  xmm1 = xmm1 + a1 * b1;
2806  xmm2 = xmm2 + a1 * b2;
2807  xmm3 = xmm3 + a1 * b3;
2808  xmm4 = xmm4 + a1 * b4;
2809  xmm5 = xmm5 + a2 * b1;
2810  xmm6 = xmm6 + a2 * b2;
2811  xmm7 = xmm7 + a2 * b3;
2812  xmm8 = xmm8 + a2 * b4;
2813  }
2814  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2815  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2816  (~C)(i ,j+2UL) = sum( xmm3 ) * scalar;
2817  (~C)(i ,j+3UL) = sum( xmm4 ) * scalar;
2818  (~C)(i+1UL,j ) = sum( xmm5 ) * scalar;
2819  (~C)(i+1UL,j+1UL) = sum( xmm6 ) * scalar;
2820  (~C)(i+1UL,j+2UL) = sum( xmm7 ) * scalar;
2821  (~C)(i+1UL,j+3UL) = sum( xmm8 ) * scalar;
2822  }
2823  for( ; (j+2UL) <= N; j+=2UL ) {
2824  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2825  for( size_t k=0UL; k<K; k+=IT::size ) {
2826  const IntrinsicType a1( A.load(i ,k) );
2827  const IntrinsicType a2( A.load(i+1UL,k) );
2828  const IntrinsicType b1( B.load(k,j ) );
2829  const IntrinsicType b2( B.load(k,j+1UL) );
2830  xmm1 = xmm1 + a1 * b1;
2831  xmm2 = xmm2 + a1 * b2;
2832  xmm3 = xmm3 + a2 * b1;
2833  xmm4 = xmm4 + a2 * b2;
2834  }
2835  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2836  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2837  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
2838  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
2839  }
2840  if( j < N ) {
2841  IntrinsicType xmm1, xmm2;
2842  for( size_t k=0UL; k<K; k+=IT::size ) {
2843  const IntrinsicType b1( B.load(k,j) );
2844  xmm1 = xmm1 + A.load(i ,k) * b1;
2845  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2846  }
2847  (~C)(i ,j) = sum( xmm1 ) * scalar;
2848  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
2849  }
2850  }
2851  if( i < M ) {
2852  size_t j( 0UL );
2853  for( ; (j+4UL) <= N; j+=4UL ) {
2854  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2855  for( size_t k=0UL; k<K; k+=IT::size ) {
2856  const IntrinsicType a1( A.load(i,k) );
2857  xmm1 = xmm1 + a1 * B.load(k,j );
2858  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2859  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
2860  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
2861  }
2862  (~C)(i,j ) = sum( xmm1 ) * scalar;
2863  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
2864  (~C)(i,j+2UL) = sum( xmm3 ) * scalar;
2865  (~C)(i,j+3UL) = sum( xmm4 ) * scalar;
2866  }
2867  for( ; (j+2UL) <= N; j+=2UL ) {
2868  IntrinsicType xmm1, xmm2;
2869  for( size_t k=0UL; k<K; k+=IT::size ) {
2870  const IntrinsicType a1( A.load(i,k) );
2871  xmm1 = xmm1 + a1 * B.load(k,j );
2872  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2873  }
2874  (~C)(i,j ) = sum( xmm1 ) * scalar;
2875  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
2876  }
2877  if( j < N ) {
2878  IntrinsicType xmm1, xmm2;
2879  for( size_t k=0UL; k<K; k+=IT::size ) {
2880  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2881  }
2882  (~C)(i,j) = sum( xmm1 ) * scalar;
2883  }
2884  }
2885  }
2886  //**********************************************************************************************
2887 
2888  //**Vectorized default assignment to column-major dense matrices********************************
2902  template< typename MT3 // Type of the left-hand side target matrix
2903  , typename MT4 // Type of the left-hand side matrix operand
2904  , typename MT5 // Type of the right-hand side matrix operand
2905  , typename ST2 > // Type of the scalar value
2906  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2907  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2908  {
2909  typedef IntrinsicTrait<ElementType> IT;
2910 
2911  const size_t M( A.rows() );
2912  const size_t N( B.columns() );
2913  const size_t K( A.columns() );
2914 
2915  size_t i( 0UL );
2916 
2917  for( ; (i+4UL) <= M; i+=4UL ) {
2918  size_t j( 0UL );
2919  for( ; (j+2UL) <= N; j+=2UL ) {
2920  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2921  for( size_t k=0UL; k<K; k+=IT::size ) {
2922  const IntrinsicType a1( A.load(i ,k) );
2923  const IntrinsicType a2( A.load(i+1UL,k) );
2924  const IntrinsicType a3( A.load(i+2UL,k) );
2925  const IntrinsicType a4( A.load(i+3UL,k) );
2926  const IntrinsicType b1( B.load(k,j ) );
2927  const IntrinsicType b2( B.load(k,j+1UL) );
2928  xmm1 = xmm1 + a1 * b1;
2929  xmm2 = xmm2 + a1 * b2;
2930  xmm3 = xmm3 + a2 * b1;
2931  xmm4 = xmm4 + a2 * b2;
2932  xmm5 = xmm5 + a3 * b1;
2933  xmm6 = xmm6 + a3 * b2;
2934  xmm7 = xmm7 + a4 * b1;
2935  xmm8 = xmm8 + a4 * b2;
2936  }
2937  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2938  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2939  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
2940  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
2941  (~C)(i+2UL,j ) = sum( xmm5 ) * scalar;
2942  (~C)(i+2UL,j+1UL) = sum( xmm6 ) * scalar;
2943  (~C)(i+3UL,j ) = sum( xmm7 ) * scalar;
2944  (~C)(i+3UL,j+1UL) = sum( xmm8 ) * scalar;
2945  }
2946  if( j < N ) {
2947  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2948  for( size_t k=0UL; k<K; k+=IT::size ) {
2949  const IntrinsicType b1( B.load(k,j) );
2950  xmm1 = xmm1 + A.load(i ,k) * b1;
2951  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2952  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
2953  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
2954  }
2955  (~C)(i ,j) = sum( xmm1 ) * scalar;
2956  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
2957  (~C)(i+2UL,j) = sum( xmm3 ) * scalar;
2958  (~C)(i+3UL,j) = sum( xmm4 ) * scalar;
2959  }
2960  }
2961  for( ; (i+2UL) <= M; i+=2UL ) {
2962  size_t j( 0UL );
2963  for( ; (j+2UL) <= N; j+=2UL ) {
2964  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2965  for( size_t k=0UL; k<K; k+=IT::size ) {
2966  const IntrinsicType a1( A.load(i ,k) );
2967  const IntrinsicType a2( A.load(i+1UL,k) );
2968  const IntrinsicType b1( B.load(k,j ) );
2969  const IntrinsicType b2( B.load(k,j+1UL) );
2970  xmm1 = xmm1 + a1 * b1;
2971  xmm2 = xmm2 + a1 * b2;
2972  xmm3 = xmm3 + a2 * b1;
2973  xmm4 = xmm4 + a2 * b2;
2974  }
2975  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2976  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2977  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
2978  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
2979  }
2980  if( j < N ) {
2981  IntrinsicType xmm1, xmm2;
2982  for( size_t k=0UL; k<K; k+=IT::size ) {
2983  const IntrinsicType b1( B.load(k,j) );
2984  xmm1 = xmm1 + A.load(i ,k) * b1;
2985  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2986  }
2987  (~C)(i ,j) = sum( xmm1 ) * scalar;
2988  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
2989  }
2990  }
2991  if( i < M ) {
2992  size_t j( 0UL );
2993  for( ; (j+2UL) <= N; j+=2UL ) {
2994  IntrinsicType xmm1, xmm2;
2995  for( size_t k=0UL; k<K; k+=IT::size ) {
2996  const IntrinsicType a1( A.load(i,k) );
2997  xmm1 = xmm1 + a1 * B.load(k,j );
2998  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2999  }
3000  (~C)(i,j ) = sum( xmm1 ) * scalar;
3001  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
3002  }
3003  if( j < N ) {
3004  IntrinsicType xmm1, xmm2;
3005  for( size_t k=0UL; k<K; k+=IT::size ) {
3006  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3007  }
3008  (~C)(i,j) = sum( xmm1 ) * scalar;
3009  }
3010  }
3011  }
3012  //**********************************************************************************************
3013 
3014  //**BLAS-based assignment to dense matrices (default)*******************************************
3028  template< typename MT3 // Type of the left-hand side target matrix
3029  , typename MT4 // Type of the left-hand side matrix operand
3030  , typename MT5 // Type of the right-hand side matrix operand
3031  , typename ST2 > // Type of the scalar value
3032  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3033  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3034  {
3035  selectDefaultAssignKernel( C, A, B, scalar );
3036  }
3037  //**********************************************************************************************
3038 
3039  //**BLAS-based assignment to dense matrices (single precision)**********************************
3040 #if BLAZE_BLAS_MODE
3041 
3054  template< typename MT3 // Type of the left-hand side target matrix
3055  , typename MT4 // Type of the left-hand side matrix operand
3056  , typename MT5 // Type of the right-hand side matrix operand
3057  , typename ST2 > // Type of the scalar value
3058  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3059  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3060  {
3061  using boost::numeric_cast;
3062 
3066 
3067  const int M ( numeric_cast<int>( A.rows() ) );
3068  const int N ( numeric_cast<int>( B.columns() ) );
3069  const int K ( numeric_cast<int>( A.columns() ) );
3070  const int lda( numeric_cast<int>( A.spacing() ) );
3071  const int ldb( numeric_cast<int>( B.spacing() ) );
3072  const int ldc( numeric_cast<int>( C.spacing() ) );
3073 
3074  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3075  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3076  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3077  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
3078  }
3079 #endif
3080  //**********************************************************************************************
3081 
3082  //**BLAS-based assignment to dense matrices (double precision)**********************************
3083 #if BLAZE_BLAS_MODE
3084 
3097  template< typename MT3 // Type of the left-hand side target matrix
3098  , typename MT4 // Type of the left-hand side matrix operand
3099  , typename MT5 // Type of the right-hand side matrix operand
3100  , typename ST2 > // Type of the scalar value
3101  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3102  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3103  {
3104  using boost::numeric_cast;
3105 
3109 
3110  const int M ( numeric_cast<int>( A.rows() ) );
3111  const int N ( numeric_cast<int>( B.columns() ) );
3112  const int K ( numeric_cast<int>( A.columns() ) );
3113  const int lda( numeric_cast<int>( A.spacing() ) );
3114  const int ldb( numeric_cast<int>( B.spacing() ) );
3115  const int ldc( numeric_cast<int>( C.spacing() ) );
3116 
3117  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3118  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3119  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3120  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
3121  }
3122 #endif
3123  //**********************************************************************************************
3124 
3125  //**BLAS-based assignment to dense matrices (single precision complex)**************************
3126 #if BLAZE_BLAS_MODE
3127 
3140  template< typename MT3 // Type of the left-hand side target matrix
3141  , typename MT4 // Type of the left-hand side matrix operand
3142  , typename MT5 // Type of the right-hand side matrix operand
3143  , typename ST2 > // Type of the scalar value
3144  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3145  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3146  {
3147  using boost::numeric_cast;
3148 
3152  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3153  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3154  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3155 
3156  const int M ( numeric_cast<int>( A.rows() ) );
3157  const int N ( numeric_cast<int>( B.columns() ) );
3158  const int K ( numeric_cast<int>( A.columns() ) );
3159  const int lda( numeric_cast<int>( A.spacing() ) );
3160  const int ldb( numeric_cast<int>( B.spacing() ) );
3161  const int ldc( numeric_cast<int>( C.spacing() ) );
3162  const complex<float> alpha( scalar );
3163  const complex<float> beta ( 0.0F, 0.0F );
3164 
3165  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3166  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3167  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3168  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3169  }
3170 #endif
3171  //**********************************************************************************************
3172 
3173  //**BLAS-based assignment to dense matrices (double precision complex)**************************
3174 #if BLAZE_BLAS_MODE
3175 
3188  template< typename MT3 // Type of the left-hand side target matrix
3189  , typename MT4 // Type of the left-hand side matrix operand
3190  , typename MT5 // Type of the right-hand side matrix operand
3191  , typename ST2 > // Type of the scalar value
3192  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3193  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3194  {
3195  using boost::numeric_cast;
3196 
3200  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3201  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3202  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3203 
3204  const int M ( numeric_cast<int>( A.rows() ) );
3205  const int N ( numeric_cast<int>( B.columns() ) );
3206  const int K ( numeric_cast<int>( A.columns() ) );
3207  const int lda( numeric_cast<int>( A.spacing() ) );
3208  const int ldb( numeric_cast<int>( B.spacing() ) );
3209  const int ldc( numeric_cast<int>( C.spacing() ) );
3210  const complex<double> alpha( scalar );
3211  const complex<double> beta ( 0.0, 0.0 );
3212 
3213  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3214  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3215  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3216  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3217  }
3218 #endif
3219  //**********************************************************************************************
3220 
3221  //**Assignment to sparse matrices***************************************************************
3233  template< typename MT // Type of the target sparse matrix
3234  , bool SO > // Storage order of the target sparse matrix
3235  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3236  {
3238 
3239  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
3240 
3247 
3248  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3249  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3250 
3251  const TmpType tmp( serial( rhs ) );
3252  assign( ~lhs, tmp );
3253  }
3254  //**********************************************************************************************
3255 
3256  //**Addition assignment to dense matrices*******************************************************
3268  template< typename MT // Type of the target dense matrix
3269  , bool SO > // Storage order of the target dense matrix
3270  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3271  {
3273 
3274  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3275  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3276 
3277  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3278  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3279 
3280  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3281  return;
3282  }
3283 
3284  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3285  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3286 
3287  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3288  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3289  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3290  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3291  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3292  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3293 
3294  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3295  }
3296  //**********************************************************************************************
3297 
3298  //**Addition assignment to dense matrices (kernel selection)************************************
3309  template< typename MT3 // Type of the left-hand side target matrix
3310  , typename MT4 // Type of the left-hand side matrix operand
3311  , typename MT5 // Type of the right-hand side matrix operand
3312  , typename ST2 > // Type of the scalar value
3313  static inline void selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3314  {
3315  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
3316  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3317  else
3318  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3319  }
3320  //**********************************************************************************************
3321 
3322  //**Default addition assignment to dense matrices***********************************************
3336  template< typename MT3 // Type of the left-hand side target matrix
3337  , typename MT4 // Type of the left-hand side matrix operand
3338  , typename MT5 // Type of the right-hand side matrix operand
3339  , typename ST2 > // Type of the scalar value
3340  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3341  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3342  {
3343  const ResultType tmp( serial( A * B * scalar ) );
3344  addAssign( C, tmp );
3345  }
3346  //**********************************************************************************************
3347 
3348  //**Vectorized default addition assignment to row-major dense matrices**************************
3362  template< typename MT3 // Type of the left-hand side target matrix
3363  , typename MT4 // Type of the left-hand side matrix operand
3364  , typename MT5 // Type of the right-hand side matrix operand
3365  , typename ST2 > // Type of the scalar value
3366  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3367  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3368  {
3369  typedef IntrinsicTrait<ElementType> IT;
3370 
3371  const size_t M( A.rows() );
3372  const size_t N( B.columns() );
3373  const size_t K( A.columns() );
3374 
3375  size_t i( 0UL );
3376 
3377  for( ; (i+2UL) <= M; i+=2UL ) {
3378  size_t j( 0UL );
3379  for( ; (j+4UL) <= N; j+=4UL ) {
3380  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3381  for( size_t k=0UL; k<K; k+=IT::size ) {
3382  const IntrinsicType a1( A.load(i ,k) );
3383  const IntrinsicType a2( A.load(i+1UL,k) );
3384  const IntrinsicType b1( B.load(k,j ) );
3385  const IntrinsicType b2( B.load(k,j+1UL) );
3386  const IntrinsicType b3( B.load(k,j+2UL) );
3387  const IntrinsicType b4( B.load(k,j+3UL) );
3388  xmm1 = xmm1 + a1 * b1;
3389  xmm2 = xmm2 + a1 * b2;
3390  xmm3 = xmm3 + a1 * b3;
3391  xmm4 = xmm4 + a1 * b4;
3392  xmm5 = xmm5 + a2 * b1;
3393  xmm6 = xmm6 + a2 * b2;
3394  xmm7 = xmm7 + a2 * b3;
3395  xmm8 = xmm8 + a2 * b4;
3396  }
3397  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3398  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3399  (~C)(i ,j+2UL) += sum( xmm3 ) * scalar;
3400  (~C)(i ,j+3UL) += sum( xmm4 ) * scalar;
3401  (~C)(i+1UL,j ) += sum( xmm5 ) * scalar;
3402  (~C)(i+1UL,j+1UL) += sum( xmm6 ) * scalar;
3403  (~C)(i+1UL,j+2UL) += sum( xmm7 ) * scalar;
3404  (~C)(i+1UL,j+3UL) += sum( xmm8 ) * scalar;
3405  }
3406  for( ; (j+2UL) <= N; j+=2UL ) {
3407  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3408  for( size_t k=0UL; k<K; k+=IT::size ) {
3409  const IntrinsicType a1( A.load(i ,k) );
3410  const IntrinsicType a2( A.load(i+1UL,k) );
3411  const IntrinsicType b1( B.load(k,j ) );
3412  const IntrinsicType b2( B.load(k,j+1UL) );
3413  xmm1 = xmm1 + a1 * b1;
3414  xmm2 = xmm2 + a1 * b2;
3415  xmm3 = xmm3 + a2 * b1;
3416  xmm4 = xmm4 + a2 * b2;
3417  }
3418  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3419  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3420  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3421  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3422  }
3423  if( j < N ) {
3424  IntrinsicType xmm1, xmm2;
3425  for( size_t k=0UL; k<K; k+=IT::size ) {
3426  const IntrinsicType b1( B.load(k,j) );
3427  xmm1 = xmm1 + A.load(i ,k) * b1;
3428  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3429  }
3430  (~C)(i ,j) += sum( xmm1 ) * scalar;
3431  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3432  }
3433  }
3434  if( i < M ) {
3435  size_t j( 0UL );
3436  for( ; (j+4UL) <= N; j+=4UL ) {
3437  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3438  for( size_t k=0UL; k<K; k+=IT::size ) {
3439  const IntrinsicType a1( A.load(i,k) );
3440  xmm1 = xmm1 + a1 * B.load(k,j );
3441  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3442  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3443  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3444  }
3445  (~C)(i,j ) += sum( xmm1 ) * scalar;
3446  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3447  (~C)(i,j+2UL) += sum( xmm3 ) * scalar;
3448  (~C)(i,j+3UL) += sum( xmm4 ) * scalar;
3449  }
3450  for( ; (j+2UL) <= N; j+=2UL ) {
3451  IntrinsicType xmm1, xmm2;
3452  for( size_t k=0UL; k<K; k+=IT::size ) {
3453  const IntrinsicType a1( A.load(i,k) );
3454  xmm1 = xmm1 + a1 * B.load(k,j );
3455  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3456  }
3457  (~C)(i,j ) += sum( xmm1 ) * scalar;
3458  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3459  }
3460  if( j < N ) {
3461  IntrinsicType xmm1, xmm2;
3462  for( size_t k=0UL; k<K; k+=IT::size ) {
3463  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3464  }
3465  (~C)(i,j) += sum( xmm1 ) * scalar;
3466  }
3467  }
3468  }
3469  //**********************************************************************************************
3470 
3471  //**Vectorized default addition assignment to column-major dense matrices***********************
3485  template< typename MT3 // Type of the left-hand side target matrix
3486  , typename MT4 // Type of the left-hand side matrix operand
3487  , typename MT5 // Type of the right-hand side matrix operand
3488  , typename ST2 > // Type of the scalar value
3489  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3490  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3491  {
3492  typedef IntrinsicTrait<ElementType> IT;
3493 
3494  const size_t M( A.rows() );
3495  const size_t N( B.columns() );
3496  const size_t K( A.columns() );
3497 
3498  size_t i( 0UL );
3499 
3500  for( ; (i+4UL) <= M; i+=4UL ) {
3501  size_t j( 0UL );
3502  for( ; (j+2UL) <= N; j+=2UL ) {
3503  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3504  for( size_t k=0UL; k<K; k+=IT::size ) {
3505  const IntrinsicType a1( A.load(i ,k) );
3506  const IntrinsicType a2( A.load(i+1UL,k) );
3507  const IntrinsicType a3( A.load(i+2UL,k) );
3508  const IntrinsicType a4( A.load(i+3UL,k) );
3509  const IntrinsicType b1( B.load(k,j ) );
3510  const IntrinsicType b2( B.load(k,j+1UL) );
3511  xmm1 = xmm1 + a1 * b1;
3512  xmm2 = xmm2 + a1 * b2;
3513  xmm3 = xmm3 + a2 * b1;
3514  xmm4 = xmm4 + a2 * b2;
3515  xmm5 = xmm5 + a3 * b1;
3516  xmm6 = xmm6 + a3 * b2;
3517  xmm7 = xmm7 + a4 * b1;
3518  xmm8 = xmm8 + a4 * b2;
3519  }
3520  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3521  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3522  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3523  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3524  (~C)(i+2UL,j ) += sum( xmm5 ) * scalar;
3525  (~C)(i+2UL,j+1UL) += sum( xmm6 ) * scalar;
3526  (~C)(i+3UL,j ) += sum( xmm7 ) * scalar;
3527  (~C)(i+3UL,j+1UL) += sum( xmm8 ) * scalar;
3528  }
3529  if( j < N ) {
3530  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3531  for( size_t k=0UL; k<K; k+=IT::size ) {
3532  const IntrinsicType b1( B.load(k,j) );
3533  xmm1 = xmm1 + A.load(i ,k) * b1;
3534  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3535  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3536  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3537  }
3538  (~C)(i ,j) += sum( xmm1 ) * scalar;
3539  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3540  (~C)(i+2UL,j) += sum( xmm3 ) * scalar;
3541  (~C)(i+3UL,j) += sum( xmm4 ) * scalar;
3542  }
3543  }
3544  for( ; (i+2UL) <= M; i+=2UL ) {
3545  size_t j( 0UL );
3546  for( ; (j+2UL) <= N; j+=2UL ) {
3547  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3548  for( size_t k=0UL; k<K; k+=IT::size ) {
3549  const IntrinsicType a1( A.load(i ,k) );
3550  const IntrinsicType a2( A.load(i+1UL,k) );
3551  const IntrinsicType b1( B.load(k,j ) );
3552  const IntrinsicType b2( B.load(k,j+1UL) );
3553  xmm1 = xmm1 + a1 * b1;
3554  xmm2 = xmm2 + a1 * b2;
3555  xmm3 = xmm3 + a2 * b1;
3556  xmm4 = xmm4 + a2 * b2;
3557  }
3558  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3559  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3560  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3561  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3562  }
3563  if( j < N ) {
3564  IntrinsicType xmm1, xmm2;
3565  for( size_t k=0UL; k<K; k+=IT::size ) {
3566  const IntrinsicType b1( B.load(k,j) );
3567  xmm1 = xmm1 + A.load(i ,k) * b1;
3568  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3569  }
3570  (~C)(i ,j) += sum( xmm1 ) * scalar;
3571  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3572  }
3573  }
3574  if( i < M ) {
3575  size_t j( 0UL );
3576  for( ; (j+2UL) <= N; j+=2UL ) {
3577  IntrinsicType xmm1, xmm2;
3578  for( size_t k=0UL; k<K; k+=IT::size ) {
3579  const IntrinsicType a1( A.load(i,k) );
3580  xmm1 = xmm1 + a1 * B.load(k,j );
3581  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3582  }
3583  (~C)(i,j ) += sum( xmm1 ) * scalar;
3584  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3585  }
3586  if( j < N ) {
3587  IntrinsicType xmm1, xmm2;
3588  for( size_t k=0UL; k<K; k+=IT::size ) {
3589  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3590  }
3591  (~C)(i,j) += sum( xmm1 ) * scalar;
3592  }
3593  }
3594  }
3595  //**********************************************************************************************
3596 
3597  //**BLAS-based addition assignment to dense matrices (default)**********************************
3611  template< typename MT3 // Type of the left-hand side target matrix
3612  , typename MT4 // Type of the left-hand side matrix operand
3613  , typename MT5 // Type of the right-hand side matrix operand
3614  , typename ST2 > // Type of the scalar value
3615  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3616  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3617  {
3618  selectDefaultAddAssignKernel( C, A, B, scalar );
3619  }
3620  //**********************************************************************************************
3621 
3622  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3623 #if BLAZE_BLAS_MODE
3624 
3637  template< typename MT3 // Type of the left-hand side target matrix
3638  , typename MT4 // Type of the left-hand side matrix operand
3639  , typename MT5 // Type of the right-hand side matrix operand
3640  , typename ST2 > // Type of the scalar value
3641  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3642  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3643  {
3644  using boost::numeric_cast;
3645 
3649 
3650  const int M ( numeric_cast<int>( A.rows() ) );
3651  const int N ( numeric_cast<int>( B.columns() ) );
3652  const int K ( numeric_cast<int>( A.columns() ) );
3653  const int lda( numeric_cast<int>( A.spacing() ) );
3654  const int ldb( numeric_cast<int>( B.spacing() ) );
3655  const int ldc( numeric_cast<int>( C.spacing() ) );
3656 
3657  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3658  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3659  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3660  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3661  }
3662 #endif
3663  //**********************************************************************************************
3664 
3665  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3666 #if BLAZE_BLAS_MODE
3667 
3680  template< typename MT3 // Type of the left-hand side target matrix
3681  , typename MT4 // Type of the left-hand side matrix operand
3682  , typename MT5 // Type of the right-hand side matrix operand
3683  , typename ST2 > // Type of the scalar value
3684  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3685  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3686  {
3687  using boost::numeric_cast;
3688 
3692 
3693  const int M ( numeric_cast<int>( A.rows() ) );
3694  const int N ( numeric_cast<int>( B.columns() ) );
3695  const int K ( numeric_cast<int>( A.columns() ) );
3696  const int lda( numeric_cast<int>( A.spacing() ) );
3697  const int ldb( numeric_cast<int>( B.spacing() ) );
3698  const int ldc( numeric_cast<int>( C.spacing() ) );
3699 
3700  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3701  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3702  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3703  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3704  }
3705 #endif
3706  //**********************************************************************************************
3707 
3708  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3709 #if BLAZE_BLAS_MODE
3710 
3723  template< typename MT3 // Type of the left-hand side target matrix
3724  , typename MT4 // Type of the left-hand side matrix operand
3725  , typename MT5 // Type of the right-hand side matrix operand
3726  , typename ST2 > // Type of the scalar value
3727  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3728  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3729  {
3730  using boost::numeric_cast;
3731 
3735  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3736  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3737  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3738 
3739  const int M ( numeric_cast<int>( A.rows() ) );
3740  const int N ( numeric_cast<int>( B.columns() ) );
3741  const int K ( numeric_cast<int>( A.columns() ) );
3742  const int lda( numeric_cast<int>( A.spacing() ) );
3743  const int ldb( numeric_cast<int>( B.spacing() ) );
3744  const int ldc( numeric_cast<int>( C.spacing() ) );
3745  const complex<float> alpha( scalar );
3746  const complex<float> beta ( 1.0F, 0.0F );
3747 
3748  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3749  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3750  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3751  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3752  }
3753 #endif
3754  //**********************************************************************************************
3755 
3756  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3757 #if BLAZE_BLAS_MODE
3758 
3771  template< typename MT3 // Type of the left-hand side target matrix
3772  , typename MT4 // Type of the left-hand side matrix operand
3773  , typename MT5 // Type of the right-hand side matrix operand
3774  , typename ST2 > // Type of the scalar value
3775  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3776  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3777  {
3778  using boost::numeric_cast;
3779 
3783  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3784  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3785  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3786 
3787  const int M ( numeric_cast<int>( A.rows() ) );
3788  const int N ( numeric_cast<int>( B.columns() ) );
3789  const int K ( numeric_cast<int>( A.columns() ) );
3790  const int lda( numeric_cast<int>( A.spacing() ) );
3791  const int ldb( numeric_cast<int>( B.spacing() ) );
3792  const int ldc( numeric_cast<int>( C.spacing() ) );
3793  const complex<double> alpha( scalar );
3794  const complex<double> beta ( 1.0, 0.0 );
3795 
3796  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3797  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3798  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3799  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3800  }
3801 #endif
3802  //**********************************************************************************************
3803 
3804  //**Addition assignment to sparse matrices******************************************************
3805  // No special implementation for the addition assignment to sparse matrices.
3806  //**********************************************************************************************
3807 
3808  //**Subtraction assignment to dense matrices****************************************************
3820  template< typename MT // Type of the target dense matrix
3821  , bool SO > // Storage order of the target dense matrix
3822  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3823  {
3825 
3826  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3827  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3828 
3829  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3830  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3831 
3832  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3833  return;
3834  }
3835 
3836  LT A( serial( left ) ); // Evaluation of the left-hand side dense matrix operand
3837  RT B( serial( right ) ); // Evaluation of the right-hand side dense matrix operand
3838 
3839  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3840  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3841  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3842  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3843  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3844  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3845 
3846  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3847  }
3848  //**********************************************************************************************
3849 
3850  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3861  template< typename MT3 // Type of the left-hand side target matrix
3862  , typename MT4 // Type of the left-hand side matrix operand
3863  , typename MT5 // Type of the right-hand side matrix operand
3864  , typename ST2 > // Type of the scalar value
3865  static inline void selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3866  {
3867  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
3868  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3869  else
3870  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3871  }
3872  //**********************************************************************************************
3873 
3874  //**Default subtraction assignment to dense matrices********************************************
3888  template< typename MT3 // Type of the left-hand side target matrix
3889  , typename MT4 // Type of the left-hand side matrix operand
3890  , typename MT5 // Type of the right-hand side matrix operand
3891  , typename ST2 > // Type of the scalar value
3892  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3893  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3894  {
3895  const ResultType tmp( serial( A * B * scalar ) );
3896  subAssign( C, tmp );
3897  }
3898  //**********************************************************************************************
3899 
3900  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3914  template< typename MT3 // Type of the left-hand side target matrix
3915  , typename MT4 // Type of the left-hand side matrix operand
3916  , typename MT5 // Type of the right-hand side matrix operand
3917  , typename ST2 > // Type of the scalar value
3918  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3919  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3920  {
3921  typedef IntrinsicTrait<ElementType> IT;
3922 
3923  const size_t M( A.rows() );
3924  const size_t N( B.columns() );
3925  const size_t K( A.columns() );
3926 
3927  size_t i( 0UL );
3928 
3929  for( ; (i+2UL) <= M; i+=2UL ) {
3930  size_t j( 0UL );
3931  for( ; (j+4UL) <= N; j+=4UL ) {
3932  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3933  for( size_t k=0UL; k<K; k+=IT::size ) {
3934  const IntrinsicType a1( A.load(i ,k) );
3935  const IntrinsicType a2( A.load(i+1UL,k) );
3936  const IntrinsicType b1( B.load(k,j ) );
3937  const IntrinsicType b2( B.load(k,j+1UL) );
3938  const IntrinsicType b3( B.load(k,j+2UL) );
3939  const IntrinsicType b4( B.load(k,j+3UL) );
3940  xmm1 = xmm1 + a1 * b1;
3941  xmm2 = xmm2 + a1 * b2;
3942  xmm3 = xmm3 + a1 * b3;
3943  xmm4 = xmm4 + a1 * b4;
3944  xmm5 = xmm5 + a2 * b1;
3945  xmm6 = xmm6 + a2 * b2;
3946  xmm7 = xmm7 + a2 * b3;
3947  xmm8 = xmm8 + a2 * b4;
3948  }
3949  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
3950  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
3951  (~C)(i ,j+2UL) -= sum( xmm3 ) * scalar;
3952  (~C)(i ,j+3UL) -= sum( xmm4 ) * scalar;
3953  (~C)(i+1UL,j ) -= sum( xmm5 ) * scalar;
3954  (~C)(i+1UL,j+1UL) -= sum( xmm6 ) * scalar;
3955  (~C)(i+1UL,j+2UL) -= sum( xmm7 ) * scalar;
3956  (~C)(i+1UL,j+3UL) -= sum( xmm8 ) * scalar;
3957  }
3958  for( ; (j+2UL) <= N; j+=2UL ) {
3959  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3960  for( size_t k=0UL; k<K; k+=IT::size ) {
3961  const IntrinsicType a1( A.load(i ,k) );
3962  const IntrinsicType a2( A.load(i+1UL,k) );
3963  const IntrinsicType b1( B.load(k,j ) );
3964  const IntrinsicType b2( B.load(k,j+1UL) );
3965  xmm1 = xmm1 + a1 * b1;
3966  xmm2 = xmm2 + a1 * b2;
3967  xmm3 = xmm3 + a2 * b1;
3968  xmm4 = xmm4 + a2 * b2;
3969  }
3970  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
3971  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
3972  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
3973  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
3974  }
3975  if( j < N ) {
3976  IntrinsicType xmm1, xmm2;
3977  for( size_t k=0UL; k<K; k+=IT::size ) {
3978  const IntrinsicType b1( B.load(k,j) );
3979  xmm1 = xmm1 + A.load(i ,k) * b1;
3980  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3981  }
3982  (~C)(i ,j) -= sum( xmm1 ) * scalar;
3983  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
3984  }
3985  }
3986  if( i < M ) {
3987  size_t j( 0UL );
3988  for( ; (j+4UL) <= N; j+=4UL ) {
3989  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3990  for( size_t k=0UL; k<K; k+=IT::size ) {
3991  const IntrinsicType a1( A.load(i,k) );
3992  xmm1 = xmm1 + a1 * B.load(k,j );
3993  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3994  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3995  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3996  }
3997  (~C)(i,j ) -= sum( xmm1 ) * scalar;
3998  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
3999  (~C)(i,j+2UL) -= sum( xmm3 ) * scalar;
4000  (~C)(i,j+3UL) -= sum( xmm4 ) * scalar;
4001  }
4002  for( ; (j+2UL) <= N; j+=2UL ) {
4003  IntrinsicType xmm1, xmm2;
4004  for( size_t k=0UL; k<K; k+=IT::size ) {
4005  const IntrinsicType a1( A.load(i,k) );
4006  xmm1 = xmm1 + a1 * B.load(k,j );
4007  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
4008  }
4009  (~C)(i,j ) -= sum( xmm1 ) * scalar;
4010  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
4011  }
4012  if( j < N ) {
4013  IntrinsicType xmm1, xmm2;
4014  for( size_t k=0UL; k<K; k+=IT::size ) {
4015  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
4016  }
4017  (~C)(i,j) -= sum( xmm1 ) * scalar;
4018  }
4019  }
4020  }
4021  //**********************************************************************************************
4022 
4023  //**Vectorized default subtraction assignment to column-major dense matrices********************
4037  template< typename MT3 // Type of the left-hand side target matrix
4038  , typename MT4 // Type of the left-hand side matrix operand
4039  , typename MT5 // Type of the right-hand side matrix operand
4040  , typename ST2 > // Type of the scalar value
4041  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4042  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
4043  {
4044  typedef IntrinsicTrait<ElementType> IT;
4045 
4046  const size_t M( A.rows() );
4047  const size_t N( B.columns() );
4048  const size_t K( A.columns() );
4049 
4050  size_t i( 0UL );
4051 
4052  for( ; (i+4UL) <= M; i+=4UL ) {
4053  size_t j( 0UL );
4054  for( ; (j+2UL) <= N; j+=2UL ) {
4055  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4056  for( size_t k=0UL; k<K; k+=IT::size ) {
4057  const IntrinsicType a1( A.load(i ,k) );
4058  const IntrinsicType a2( A.load(i+1UL,k) );
4059  const IntrinsicType a3( A.load(i+2UL,k) );
4060  const IntrinsicType a4( A.load(i+3UL,k) );
4061  const IntrinsicType b1( B.load(k,j ) );
4062  const IntrinsicType b2( B.load(k,j+1UL) );
4063  xmm1 = xmm1 + a1 * b1;
4064  xmm2 = xmm2 + a1 * b2;
4065  xmm3 = xmm3 + a2 * b1;
4066  xmm4 = xmm4 + a2 * b2;
4067  xmm5 = xmm5 + a3 * b1;
4068  xmm6 = xmm6 + a3 * b2;
4069  xmm7 = xmm7 + a4 * b1;
4070  xmm8 = xmm8 + a4 * b2;
4071  }
4072  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
4073  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
4074  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
4075  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
4076  (~C)(i+2UL,j ) -= sum( xmm5 ) * scalar;
4077  (~C)(i+2UL,j+1UL) -= sum( xmm6 ) * scalar;
4078  (~C)(i+3UL,j ) -= sum( xmm7 ) * scalar;
4079  (~C)(i+3UL,j+1UL) -= sum( xmm8 ) * scalar;
4080  }
4081  if( j < N ) {
4082  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4083  for( size_t k=0UL; k<K; k+=IT::size ) {
4084  const IntrinsicType b1( B.load(k,j) );
4085  xmm1 = xmm1 + A.load(i ,k) * b1;
4086  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4087  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
4088  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
4089  }
4090  (~C)(i ,j) -= sum( xmm1 ) * scalar;
4091  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
4092  (~C)(i+2UL,j) -= sum( xmm3 ) * scalar;
4093  (~C)(i+3UL,j) -= sum( xmm4 ) * scalar;
4094  }
4095  }
4096  for( ; (i+2UL) <= M; i+=2UL ) {
4097  size_t j( 0UL );
4098  for( ; (j+2UL) <= N; j+=2UL ) {
4099  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4100  for( size_t k=0UL; k<K; k+=IT::size ) {
4101  const IntrinsicType a1( A.load(i ,k) );
4102  const IntrinsicType a2( A.load(i+1UL,k) );
4103  const IntrinsicType b1( B.load(k,j ) );
4104  const IntrinsicType b2( B.load(k,j+1UL) );
4105  xmm1 = xmm1 + a1 * b1;
4106  xmm2 = xmm2 + a1 * b2;
4107  xmm3 = xmm3 + a2 * b1;
4108  xmm4 = xmm4 + a2 * b2;
4109  }
4110  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
4111  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
4112  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
4113  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
4114  }
4115  if( j < N ) {
4116  IntrinsicType xmm1, xmm2;
4117  for( size_t k=0UL; k<K; k+=IT::size ) {
4118  const IntrinsicType b1( B.load(k,j) );
4119  xmm1 = xmm1 + A.load(i ,k) * b1;
4120  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4121  }
4122  (~C)(i ,j) -= sum( xmm1 ) * scalar;
4123  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
4124  }
4125  }
4126  if( i < M ) {
4127  size_t j( 0UL );
4128  for( ; (j+2UL) <= N; j+=2UL ) {
4129  IntrinsicType xmm1, xmm2;
4130  for( size_t k=0UL; k<K; k+=IT::size ) {
4131  const IntrinsicType a1( A.load(i,k) );
4132  xmm1 = xmm1 + a1 * B.load(k,j );
4133  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
4134  }
4135  (~C)(i,j ) -= sum( xmm1 ) * scalar;
4136  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
4137  }
4138  if( j < N ) {
4139  IntrinsicType xmm1, xmm2;
4140  for( size_t k=0UL; k<K; k+=IT::size ) {
4141  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
4142  }
4143  (~C)(i,j) -= sum( xmm1 ) * scalar;
4144  }
4145  }
4146  }
4147  //**********************************************************************************************
4148 
4149  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
4163  template< typename MT3 // Type of the left-hand side target matrix
4164  , typename MT4 // Type of the left-hand side matrix operand
4165  , typename MT5 // Type of the right-hand side matrix operand
4166  , typename ST2 > // Type of the scalar value
4167  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4168  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4169  {
4170  selectDefaultSubAssignKernel( C, A, B, scalar );
4171  }
4172  //**********************************************************************************************
4173 
4174  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
4175 #if BLAZE_BLAS_MODE
4176 
4189  template< typename MT3 // Type of the left-hand side target matrix
4190  , typename MT4 // Type of the left-hand side matrix operand
4191  , typename MT5 // Type of the right-hand side matrix operand
4192  , typename ST2 > // Type of the scalar value
4193  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4194  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4195  {
4196  using boost::numeric_cast;
4197 
4201 
4202  const int M ( numeric_cast<int>( A.rows() ) );
4203  const int N ( numeric_cast<int>( B.columns() ) );
4204  const int K ( numeric_cast<int>( A.columns() ) );
4205  const int lda( numeric_cast<int>( A.spacing() ) );
4206  const int ldb( numeric_cast<int>( B.spacing() ) );
4207  const int ldc( numeric_cast<int>( C.spacing() ) );
4208 
4209  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4210  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4211  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4212  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
4213  }
4214 #endif
4215  //**********************************************************************************************
4216 
4217  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
4218 #if BLAZE_BLAS_MODE
4219 
4232  template< typename MT3 // Type of the left-hand side target matrix
4233  , typename MT4 // Type of the left-hand side matrix operand
4234  , typename MT5 // Type of the right-hand side matrix operand
4235  , typename ST2 > // Type of the scalar value
4236  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4237  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4238  {
4239  using boost::numeric_cast;
4240 
4244 
4245  const int M ( numeric_cast<int>( A.rows() ) );
4246  const int N ( numeric_cast<int>( B.columns() ) );
4247  const int K ( numeric_cast<int>( A.columns() ) );
4248  const int lda( numeric_cast<int>( A.spacing() ) );
4249  const int ldb( numeric_cast<int>( B.spacing() ) );
4250  const int ldc( numeric_cast<int>( C.spacing() ) );
4251 
4252  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4253  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4254  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4255  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
4256  }
4257 #endif
4258  //**********************************************************************************************
4259 
4260  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
4261 #if BLAZE_BLAS_MODE
4262 
4275  template< typename MT3 // Type of the left-hand side target matrix
4276  , typename MT4 // Type of the left-hand side matrix operand
4277  , typename MT5 // Type of the right-hand side matrix operand
4278  , typename ST2 > // Type of the scalar value
4279  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4280  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4281  {
4282  using boost::numeric_cast;
4283 
4287  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
4288  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
4289  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
4290 
4291  const int M ( numeric_cast<int>( A.rows() ) );
4292  const int N ( numeric_cast<int>( B.columns() ) );
4293  const int K ( numeric_cast<int>( A.columns() ) );
4294  const int lda( numeric_cast<int>( A.spacing() ) );
4295  const int ldb( numeric_cast<int>( B.spacing() ) );
4296  const int ldc( numeric_cast<int>( C.spacing() ) );
4297  const complex<float> alpha( -scalar );
4298  const complex<float> beta ( 1.0F, 0.0F );
4299 
4300  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4301  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4302  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4303  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4304  }
4305 #endif
4306  //**********************************************************************************************
4307 
4308  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
4309 #if BLAZE_BLAS_MODE
4310 
4323  template< typename MT3 // Type of the left-hand side target matrix
4324  , typename MT4 // Type of the left-hand side matrix operand
4325  , typename MT5 // Type of the right-hand side matrix operand
4326  , typename ST2 > // Type of the scalar value
4327  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4328  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4329  {
4330  using boost::numeric_cast;
4331 
4335  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
4336  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
4337  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
4338 
4339  const int M ( numeric_cast<int>( A.rows() ) );
4340  const int N ( numeric_cast<int>( B.columns() ) );
4341  const int K ( numeric_cast<int>( A.columns() ) );
4342  const int lda( numeric_cast<int>( A.spacing() ) );
4343  const int ldb( numeric_cast<int>( B.spacing() ) );
4344  const int ldc( numeric_cast<int>( C.spacing() ) );
4345  const complex<double> alpha( -scalar );
4346  const complex<double> beta ( 1.0, 0.0 );
4347 
4348  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4349  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4350  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4351  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4352  }
4353 #endif
4354  //**********************************************************************************************
4355 
4356  //**Subtraction assignment to sparse matrices***************************************************
4357  // No special implementation for the subtraction assignment to sparse matrices.
4358  //**********************************************************************************************
4359 
4360  //**Multiplication assignment to dense matrices*************************************************
4361  // No special implementation for the multiplication assignment to dense matrices.
4362  //**********************************************************************************************
4363 
4364  //**Multiplication assignment to sparse matrices************************************************
4365  // No special implementation for the multiplication assignment to sparse matrices.
4366  //**********************************************************************************************
4367 
4368  //**SMP assignment to dense matrices************************************************************
4382  template< typename MT // Type of the target dense matrix
4383  , bool SO > // Storage order of the target dense matrix
4384  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4385  smpAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4386  {
4388 
4389  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4390  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4391 
4392  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4393  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4394 
4395  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
4396  return;
4397  }
4398  else if( left.columns() == 0UL ) {
4399  reset( ~lhs );
4400  return;
4401  }
4402 
4403  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4404  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4405 
4406  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4407  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4408  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4409  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4410  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4411  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4412 
4413  smpAssign( ~lhs, A * B * rhs.scalar_ );
4414  }
4415  //**********************************************************************************************
4416 
4417  //**SMP assignment to sparse matrices***********************************************************
4431  template< typename MT // Type of the target sparse matrix
4432  , bool SO > // Storage order of the target sparse matrix
4433  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4434  smpAssign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4435  {
4437 
4438  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
4439 
4446 
4447  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4448  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4449 
4450  const TmpType tmp( rhs );
4451  smpAssign( ~lhs, tmp );
4452  }
4453  //**********************************************************************************************
4454 
4455  //**SMP addition assignment to dense matrices***************************************************
4469  template< typename MT // Type of the target dense matrix
4470  , bool SO > // Storage order of the target dense matrix
4471  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4472  smpAddAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4473  {
4475 
4476  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4477  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4478 
4479  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4480  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4481 
4482  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4483  return;
4484  }
4485 
4486  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4487  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4488 
4489  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4490  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4491  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4492  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4493  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4494  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4495 
4496  smpAddAssign( ~lhs, A * B * rhs.scalar_ );
4497  }
4498  //**********************************************************************************************
4499 
4500  //**SMP addition assignment to sparse matrices**************************************************
4501  // No special implementation for the SMP addition assignment to sparse matrices.
4502  //**********************************************************************************************
4503 
4504  //**SMP subtraction assignment to dense matrices************************************************
4518  template< typename MT // Type of the target dense matrix
4519  , bool SO > // Storage order of the target dense matrix
4520  friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4521  smpSubAssign( DenseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
4522  {
4524 
4525  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
4526  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
4527 
4528  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4529  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4530 
4531  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
4532  return;
4533  }
4534 
4535  LT A( left ); // Evaluation of the left-hand side dense matrix operand
4536  RT B( right ); // Evaluation of the right-hand side dense matrix operand
4537 
4538  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
4539  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
4540  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
4541  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
4542  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
4543  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
4544 
4545  smpSubAssign( ~lhs, A * B * rhs.scalar_ );
4546  }
4547  //**********************************************************************************************
4548 
4549  //**SMP subtraction assignment to sparse matrices***********************************************
4550  // No special implementation for the SMP subtraction assignment to sparse matrices.
4551  //**********************************************************************************************
4552 
4553  //**SMP multiplication assignment to dense matrices*********************************************
4554  // No special implementation for the SMP multiplication assignment to dense matrices.
4555  //**********************************************************************************************
4556 
4557  //**SMP multiplication assignment to sparse matrices********************************************
4558  // No special implementation for the SMP multiplication assignment to sparse matrices.
4559  //**********************************************************************************************
4560 
4561  //**Compile time checks*************************************************************************
4570  //**********************************************************************************************
4571 };
4573 //*************************************************************************************************
4574 
4575 
4576 
4577 
4578 //=================================================================================================
4579 //
4580 // GLOBAL BINARY ARITHMETIC OPERATORS
4581 //
4582 //=================================================================================================
4583 
4584 //*************************************************************************************************
4613 template< typename T1 // Type of the left-hand side dense matrix
4614  , typename T2 > // Type of the right-hand side dense matrix
4615 inline const DMatTDMatMultExpr<T1,T2>
4617 {
4619 
4620  if( (~lhs).columns() != (~rhs).rows() )
4621  throw std::invalid_argument( "Matrix sizes do not match" );
4622 
4623  return DMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
4624 }
4625 //*************************************************************************************************
4626 
4627 
4628 
4629 
4630 //=================================================================================================
4631 //
4632 // EXPRESSION TRAIT SPECIALIZATIONS
4633 //
4634 //=================================================================================================
4635 
4636 //*************************************************************************************************
4638 template< typename MT1, typename MT2, typename VT >
4639 struct DMatDVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
4640 {
4641  public:
4642  //**********************************************************************************************
4643  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4644  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4645  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4646  , typename DMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4647  , INVALID_TYPE >::Type Type;
4648  //**********************************************************************************************
4649 };
4651 //*************************************************************************************************
4652 
4653 
4654 //*************************************************************************************************
4656 template< typename MT1, typename MT2, typename VT >
4657 struct DMatSVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
4658 {
4659  public:
4660  //**********************************************************************************************
4661  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4662  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4663  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4664  , typename DMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4665  , INVALID_TYPE >::Type Type;
4666  //**********************************************************************************************
4667 };
4669 //*************************************************************************************************
4670 
4671 
4672 //*************************************************************************************************
4674 template< typename VT, typename MT1, typename MT2 >
4675 struct TDVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
4676 {
4677  public:
4678  //**********************************************************************************************
4679  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4680  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4681  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4682  , typename TDVecTDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4683  , INVALID_TYPE >::Type Type;
4684  //**********************************************************************************************
4685 };
4687 //*************************************************************************************************
4688 
4689 
4690 //*************************************************************************************************
4692 template< typename VT, typename MT1, typename MT2 >
4693 struct TSVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
4694 {
4695  public:
4696  //**********************************************************************************************
4697  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4698  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4699  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4700  , typename TDVecTDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4701  , INVALID_TYPE >::Type Type;
4702  //**********************************************************************************************
4703 };
4705 //*************************************************************************************************
4706 
4707 
4708 //*************************************************************************************************
4710 template< typename MT1, typename MT2, bool AF >
4711 struct SubmatrixExprTrait< DMatTDMatMultExpr<MT1,MT2>, AF >
4712 {
4713  public:
4714  //**********************************************************************************************
4715  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4716  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4717  //**********************************************************************************************
4718 };
4720 //*************************************************************************************************
4721 
4722 
4723 //*************************************************************************************************
4725 template< typename MT1, typename MT2 >
4726 struct RowExprTrait< DMatTDMatMultExpr<MT1,MT2> >
4727 {
4728  public:
4729  //**********************************************************************************************
4730  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4731  //**********************************************************************************************
4732 };
4734 //*************************************************************************************************
4735 
4736 
4737 //*************************************************************************************************
4739 template< typename MT1, typename MT2 >
4740 struct ColumnExprTrait< DMatTDMatMultExpr<MT1,MT2> >
4741 {
4742  public:
4743  //**********************************************************************************************
4744  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
4745  //**********************************************************************************************
4746 };
4748 //*************************************************************************************************
4749 
4750 } // namespace blaze
4751 
4752 #endif
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatTDMatMultExpr.h:404
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:247
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:255
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4599
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4329
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:249
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:300
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:199
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
Header file for the IsColumnMajorMatrix type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatTDMatMultExpr.h:394
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2408
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:251
DMatTDMatMultExpr< MT1, MT2 > This
Type of this DMatTDMatMultExpr instance.
Definition: DMatTDMatMultExpr.h:245
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:249
Header file for the TDVecSMatMultExprTrait class template.
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:690
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:123
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:125
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:285
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:340
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:252
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
const size_t SMP_DMATTDMATMULT_THRESHOLD
SMP row-major dense matrix/column-major dense matrix multiplication threshold.This threshold specifie...
Definition: Thresholds.h:857
Header file for the TDMatSVecMultExprTrait class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:384
Header file for the DenseMatrix base class.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:122
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:126
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the serial shim.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatTDMatMultExpr.h:250
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:330
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
Header file for the IsNumeric type trait.
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:246
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
const size_t DMATTDMATMULT_THRESHOLD
Row-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies the ...
Definition: Thresholds.h:142
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:372
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:124
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:350
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:250
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:251
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2403
Header file for basic type definitions.
Header file for the IsComplex type trait.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:413
Header file for the complex data type.
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:115
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:121
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:414
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:360
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:248
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:261
Constraint on the data type.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:264
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:258
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.