All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DMatTDMatMultExpr.h
Go to the documentation of this file.
1 //=================================================================================================
33 //=================================================================================================
34 
35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
37 
38 
39 //*************************************************************************************************
40 // Includes
41 //*************************************************************************************************
42 
43 #include <stdexcept>
44 #include <boost/cast.hpp>
52 #include <blaze/math/Intrinsics.h>
53 #include <blaze/math/shims/Reset.h>
77 #include <blaze/system/BLAS.h>
79 #include <blaze/util/Assert.h>
80 #include <blaze/util/Complex.h>
86 #include <blaze/util/DisableIf.h>
87 #include <blaze/util/EnableIf.h>
88 #include <blaze/util/InvalidType.h>
90 #include <blaze/util/SelectType.h>
91 #include <blaze/util/Types.h>
97 
98 
99 namespace blaze {
100 
101 //=================================================================================================
102 //
103 // CLASS DMATTDMATMULTEXPR
104 //
105 //=================================================================================================
106 
107 //*************************************************************************************************
114 template< typename MT1 // Type of the left-hand side dense matrix
115  , typename MT2 > // Type of the right-hand side dense matrix
116 class DMatTDMatMultExpr : public DenseMatrix< DMatTDMatMultExpr<MT1,MT2>, false >
117  , private MatMatMultExpr
118  , private Computation
119 {
120  private:
121  //**Type definitions****************************************************************************
122  typedef typename MT1::ResultType RT1;
123  typedef typename MT2::ResultType RT2;
124  typedef typename RT1::ElementType ET1;
125  typedef typename RT2::ElementType ET2;
126  typedef typename MT1::CompositeType CT1;
127  typedef typename MT2::CompositeType CT2;
128  //**********************************************************************************************
129 
130  //**********************************************************************************************
133  //**********************************************************************************************
134 
135  //**********************************************************************************************
137  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
138  //**********************************************************************************************
139 
140  //**********************************************************************************************
142 
145  template< typename T1, typename T2, typename T3 >
146  struct UseSMPAssignKernel {
147  enum { value = evaluateLeft || evaluateRight };
148  };
150  //**********************************************************************************************
151 
152  //**********************************************************************************************
154 
157  template< typename T1, typename T2, typename T3 >
158  struct UseSinglePrecisionKernel {
159  enum { value = IsFloat<typename T1::ElementType>::value &&
160  IsFloat<typename T2::ElementType>::value &&
161  IsFloat<typename T3::ElementType>::value };
162  };
164  //**********************************************************************************************
165 
166  //**********************************************************************************************
168 
171  template< typename T1, typename T2, typename T3 >
172  struct UseDoublePrecisionKernel {
173  enum { value = IsDouble<typename T1::ElementType>::value &&
174  IsDouble<typename T2::ElementType>::value &&
175  IsDouble<typename T3::ElementType>::value };
176  };
178  //**********************************************************************************************
179 
180  //**********************************************************************************************
182 
186  template< typename T1, typename T2, typename T3 >
187  struct UseSinglePrecisionComplexKernel {
188  typedef complex<float> Type;
189  enum { value = IsSame<typename T1::ElementType,Type>::value &&
190  IsSame<typename T2::ElementType,Type>::value &&
191  IsSame<typename T3::ElementType,Type>::value };
192  };
194  //**********************************************************************************************
195 
196  //**********************************************************************************************
198 
202  template< typename T1, typename T2, typename T3 >
203  struct UseDoublePrecisionComplexKernel {
204  typedef complex<double> Type;
205  enum { value = IsSame<typename T1::ElementType,Type>::value &&
206  IsSame<typename T2::ElementType,Type>::value &&
207  IsSame<typename T3::ElementType,Type>::value };
208  };
210  //**********************************************************************************************
211 
212  //**********************************************************************************************
214 
217  template< typename T1, typename T2, typename T3 >
218  struct UseDefaultKernel {
219  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
220  !UseDoublePrecisionKernel<T1,T2,T3>::value &&
221  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
222  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
223  };
225  //**********************************************************************************************
226 
227  //**********************************************************************************************
229 
232  template< typename T1, typename T2, typename T3 >
233  struct UseVectorizedDefaultKernel {
234  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
235  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
236  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
237  IntrinsicTrait<typename T1::ElementType>::addition &&
238  IntrinsicTrait<typename T1::ElementType>::multiplication };
239  };
241  //**********************************************************************************************
242 
243  public:
244  //**Type definitions****************************************************************************
251  typedef const ElementType ReturnType;
252  typedef const ResultType CompositeType;
253 
255  typedef typename SelectType< IsExpression<MT1>::value, const MT1, const MT1& >::Type LeftOperand;
256 
258  typedef typename SelectType< IsExpression<MT2>::value, const MT2, const MT2& >::Type RightOperand;
259 
262 
265  //**********************************************************************************************
266 
267  //**Compilation flags***************************************************************************
269  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
273 
275  enum { smpAssignable = !evaluateLeft && !evaluateRight };
276  //**********************************************************************************************
277 
278  //**Constructor*********************************************************************************
284  explicit inline DMatTDMatMultExpr( const MT1& lhs, const MT2& rhs )
285  : lhs_( lhs ) // Left-hand side dense matrix of the multiplication expression
286  , rhs_( rhs ) // Right-hand side dense matrix of the multiplication expression
287  {
288  BLAZE_INTERNAL_ASSERT( lhs.columns() == rhs.rows(), "Invalid matrix sizes" );
289  }
290  //**********************************************************************************************
291 
292  //**Access operator*****************************************************************************
299  inline ReturnType operator()( size_t i, size_t j ) const {
300  BLAZE_INTERNAL_ASSERT( i < lhs_.rows() , "Invalid row access index" );
301  BLAZE_INTERNAL_ASSERT( j < rhs_.columns(), "Invalid column access index" );
302 
303  ElementType tmp;
304 
305  if( lhs_.columns() != 0UL ) {
306  const size_t end( ( ( lhs_.columns()-1UL ) & size_t(-2) ) + 1UL );
307  tmp = lhs_(i,0UL) * rhs_(0UL,j);
308  for( size_t k=1UL; k<end; k+=2UL ) {
309  tmp += lhs_(i,k ) * rhs_(k ,j);
310  tmp += lhs_(i,k+1UL) * rhs_(k+1UL,j);
311  }
312  if( end < lhs_.columns() ) {
313  tmp += lhs_(i,end) * rhs_(end,j);
314  }
315  }
316  else {
317  reset( tmp );
318  }
319 
320  return tmp;
321  }
322  //**********************************************************************************************
323 
324  //**Rows function*******************************************************************************
329  inline size_t rows() const {
330  return lhs_.rows();
331  }
332  //**********************************************************************************************
333 
334  //**Columns function****************************************************************************
339  inline size_t columns() const {
340  return rhs_.columns();
341  }
342  //**********************************************************************************************
343 
344  //**Left operand access*************************************************************************
349  inline LeftOperand leftOperand() const {
350  return lhs_;
351  }
352  //**********************************************************************************************
353 
354  //**Right operand access************************************************************************
359  inline RightOperand rightOperand() const {
360  return rhs_;
361  }
362  //**********************************************************************************************
363 
364  //**********************************************************************************************
370  template< typename T >
371  inline bool canAlias( const T* alias ) const {
372  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
373  }
374  //**********************************************************************************************
375 
376  //**********************************************************************************************
382  template< typename T >
383  inline bool isAliased( const T* alias ) const {
384  return ( lhs_.isAliased( alias ) || rhs_.isAliased( alias ) );
385  }
386  //**********************************************************************************************
387 
388  //**********************************************************************************************
393  inline bool isAligned() const {
394  return lhs_.isAligned() && rhs_.isAligned();
395  }
396  //**********************************************************************************************
397 
398  //**********************************************************************************************
403  inline bool canSMPAssign() const {
404  return ( !BLAZE_BLAS_IS_PARALLEL ||
405  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
407  }
408  //**********************************************************************************************
409 
410  private:
411  //**Member variables****************************************************************************
414  //**********************************************************************************************
415 
416  //**Assignment to dense matrices****************************************************************
426  template< typename MT // Type of the target dense matrix
427  , bool SO > // Storage order of the target dense matrix
428  friend inline void assign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
429  {
431 
432  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
433  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
434 
435  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
436  return;
437  }
438  else if( rhs.lhs_.columns() == 0UL ) {
439  reset( ~lhs );
440  return;
441  }
442 
443  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
444  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
445 
446  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
447  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
448  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
449  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
450  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
451  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
452 
453  DMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
454  }
456  //**********************************************************************************************
457 
458  //**Assignment to dense matrices (kernel selection)*********************************************
469  template< typename MT3 // Type of the left-hand side target matrix
470  , typename MT4 // Type of the left-hand side matrix operand
471  , typename MT5 > // Type of the right-hand side matrix operand
472  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
473  selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
474  {
475  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
476  DMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
477  else
478  DMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
479  }
481  //**********************************************************************************************
482 
483  //**Assignment to dense matrices (kernel selection)*********************************************
494  template< typename MT3 // Type of the left-hand side target matrix
495  , typename MT4 // Type of the left-hand side matrix operand
496  , typename MT5 > // Type of the right-hand side matrix operand
497  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
498  selectAssignKernel( MT3& C, const MT4& A, const MT5& B )
499  {
500  smpAssign( C, A * B );
501  }
503  //**********************************************************************************************
504 
505  //**Default assignment to dense matrices********************************************************
519  template< typename MT3 // Type of the left-hand side target matrix
520  , typename MT4 // Type of the left-hand side matrix operand
521  , typename MT5 > // Type of the right-hand side matrix operand
522  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
523  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B )
524  {
525  const size_t M( A.rows() );
526  const size_t N( B.columns() );
527  const size_t K( A.columns() );
528 
529  for( size_t i=0UL; i<M; ++i ) {
530  for( size_t j=0UL; j<N; ++j ) {
531  C(i,j) = A(i,0UL) * B(0UL,j);
532  }
533  for( size_t k=1UL; k<K; ++k ) {
534  for( size_t j=0UL; j<N; ++j ) {
535  C(i,j) += A(i,k) * B(k,j);
536  }
537  }
538  }
539  }
541  //**********************************************************************************************
542 
543  //**Vectorized default assignment to row-major dense matrices***********************************
557  template< typename MT3 // Type of the left-hand side target matrix
558  , typename MT4 // Type of the left-hand side matrix operand
559  , typename MT5 > // Type of the right-hand side matrix operand
560  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
561  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
562  {
563  typedef IntrinsicTrait<ElementType> IT;
564 
565  const size_t M( A.rows() );
566  const size_t N( B.columns() );
567  const size_t K( A.columns() );
568 
569  size_t i( 0UL );
570 
571  for( ; (i+2UL) <= M; i+=2UL ) {
572  size_t j( 0UL );
573  for( ; (j+4UL) <= N; j+=4UL ) {
574  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
575  for( size_t k=0UL; k<K; k+=IT::size ) {
576  const IntrinsicType a1( A.load(i ,k) );
577  const IntrinsicType a2( A.load(i+1UL,k) );
578  const IntrinsicType b1( B.load(k,j ) );
579  const IntrinsicType b2( B.load(k,j+1UL) );
580  const IntrinsicType b3( B.load(k,j+2UL) );
581  const IntrinsicType b4( B.load(k,j+3UL) );
582  xmm1 = xmm1 + a1 * b1;
583  xmm2 = xmm2 + a1 * b2;
584  xmm3 = xmm3 + a1 * b3;
585  xmm4 = xmm4 + a1 * b4;
586  xmm5 = xmm5 + a2 * b1;
587  xmm6 = xmm6 + a2 * b2;
588  xmm7 = xmm7 + a2 * b3;
589  xmm8 = xmm8 + a2 * b4;
590  }
591  (~C)(i ,j ) = sum( xmm1 );
592  (~C)(i ,j+1UL) = sum( xmm2 );
593  (~C)(i ,j+2UL) = sum( xmm3 );
594  (~C)(i ,j+3UL) = sum( xmm4 );
595  (~C)(i+1UL,j ) = sum( xmm5 );
596  (~C)(i+1UL,j+1UL) = sum( xmm6 );
597  (~C)(i+1UL,j+2UL) = sum( xmm7 );
598  (~C)(i+1UL,j+3UL) = sum( xmm8 );
599  }
600  for( ; (j+2UL) <= N; j+=2UL ) {
601  IntrinsicType xmm1, xmm2, xmm3, xmm4;
602  for( size_t k=0UL; k<K; k+=IT::size ) {
603  const IntrinsicType a1( A.load(i ,k) );
604  const IntrinsicType a2( A.load(i+1UL,k) );
605  const IntrinsicType b1( B.load(k,j ) );
606  const IntrinsicType b2( B.load(k,j+1UL) );
607  xmm1 = xmm1 + a1 * b1;
608  xmm2 = xmm2 + a1 * b2;
609  xmm3 = xmm3 + a2 * b1;
610  xmm4 = xmm4 + a2 * b2;
611  }
612  (~C)(i ,j ) = sum( xmm1 );
613  (~C)(i ,j+1UL) = sum( xmm2 );
614  (~C)(i+1UL,j ) = sum( xmm3 );
615  (~C)(i+1UL,j+1UL) = sum( xmm4 );
616  }
617  if( j < N ) {
618  IntrinsicType xmm1, xmm2;
619  for( size_t k=0UL; k<K; k+=IT::size ) {
620  const IntrinsicType b1( B.load(k,j) );
621  xmm1 = xmm1 + A.load(i ,k) * b1;
622  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
623  }
624  (~C)(i ,j) = sum( xmm1 );
625  (~C)(i+1UL,j) = sum( xmm2 );
626  }
627  }
628  if( i < M ) {
629  size_t j( 0UL );
630  for( ; (j+4UL) <= N; j+=4UL ) {
631  IntrinsicType xmm1, xmm2, xmm3, xmm4;
632  for( size_t k=0UL; k<K; k+=IT::size ) {
633  const IntrinsicType a1( A.load(i,k) );
634  xmm1 = xmm1 + a1 * B.load(k,j );
635  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
636  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
637  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
638  }
639  (~C)(i,j ) = sum( xmm1 );
640  (~C)(i,j+1UL) = sum( xmm2 );
641  (~C)(i,j+2UL) = sum( xmm3 );
642  (~C)(i,j+3UL) = sum( xmm4 );
643  }
644  for( ; (j+2UL) <= N; j+=2UL ) {
645  IntrinsicType xmm1, xmm2;
646  for( size_t k=0UL; k<K; k+=IT::size ) {
647  const IntrinsicType a1( A.load(i,k) );
648  xmm1 = xmm1 + a1 * B.load(k,j );
649  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
650  }
651  (~C)(i,j ) = sum( xmm1 );
652  (~C)(i,j+1UL) = sum( xmm2 );
653  }
654  if( j < N ) {
655  IntrinsicType xmm1, xmm2;
656  for( size_t k=0UL; k<K; k+=IT::size ) {
657  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
658  }
659  (~C)(i,j) = sum( xmm1 );
660  }
661  }
662  }
664  //**********************************************************************************************
665 
666  //**Vectorized default assignment to column-major dense matrices********************************
680  template< typename MT3 // Type of the left-hand side target matrix
681  , typename MT4 // Type of the left-hand side matrix operand
682  , typename MT5 > // Type of the right-hand side matrix operand
683  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
684  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
685  {
686  typedef IntrinsicTrait<ElementType> IT;
687 
688  const size_t M( A.rows() );
689  const size_t N( B.columns() );
690  const size_t K( A.columns() );
691 
692  size_t i( 0UL );
693 
694  for( ; (i+4UL) <= M; i+=4UL ) {
695  size_t j( 0UL );
696  for( ; (j+2UL) <= N; j+=2UL ) {
697  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
698  for( size_t k=0UL; k<K; k+=IT::size ) {
699  const IntrinsicType a1( A.load(i ,k) );
700  const IntrinsicType a2( A.load(i+1UL,k) );
701  const IntrinsicType a3( A.load(i+2UL,k) );
702  const IntrinsicType a4( A.load(i+3UL,k) );
703  const IntrinsicType b1( B.load(k,j ) );
704  const IntrinsicType b2( B.load(k,j+1UL) );
705  xmm1 = xmm1 + a1 * b1;
706  xmm2 = xmm2 + a1 * b2;
707  xmm3 = xmm3 + a2 * b1;
708  xmm4 = xmm4 + a2 * b2;
709  xmm5 = xmm5 + a3 * b1;
710  xmm6 = xmm6 + a3 * b2;
711  xmm7 = xmm7 + a4 * b1;
712  xmm8 = xmm8 + a4 * b2;
713  }
714  (~C)(i ,j ) = sum( xmm1 );
715  (~C)(i ,j+1UL) = sum( xmm2 );
716  (~C)(i+1UL,j ) = sum( xmm3 );
717  (~C)(i+1UL,j+1UL) = sum( xmm4 );
718  (~C)(i+2UL,j ) = sum( xmm5 );
719  (~C)(i+2UL,j+1UL) = sum( xmm6 );
720  (~C)(i+3UL,j ) = sum( xmm7 );
721  (~C)(i+3UL,j+1UL) = sum( xmm8 );
722  }
723  if( j < N ) {
724  IntrinsicType xmm1, xmm2, xmm3, xmm4;
725  for( size_t k=0UL; k<K; k+=IT::size ) {
726  const IntrinsicType b1( B.load(k,j) );
727  xmm1 = xmm1 + A.load(i ,k) * b1;
728  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
729  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
730  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
731  }
732  (~C)(i ,j) = sum( xmm1 );
733  (~C)(i+1UL,j) = sum( xmm2 );
734  (~C)(i+2UL,j) = sum( xmm3 );
735  (~C)(i+3UL,j) = sum( xmm4 );
736  }
737  }
738  for( ; (i+2UL) <= M; i+=2UL ) {
739  size_t j( 0UL );
740  for( ; (j+2UL) <= N; j+=2UL ) {
741  IntrinsicType xmm1, xmm2, xmm3, xmm4;
742  for( size_t k=0UL; k<K; k+=IT::size ) {
743  const IntrinsicType a1( A.load(i ,k) );
744  const IntrinsicType a2( A.load(i+1UL,k) );
745  const IntrinsicType b1( B.load(k,j ) );
746  const IntrinsicType b2( B.load(k,j+1UL) );
747  xmm1 = xmm1 + a1 * b1;
748  xmm2 = xmm2 + a1 * b2;
749  xmm3 = xmm3 + a2 * b1;
750  xmm4 = xmm4 + a2 * b2;
751  }
752  (~C)(i ,j ) = sum( xmm1 );
753  (~C)(i ,j+1UL) = sum( xmm2 );
754  (~C)(i+1UL,j ) = sum( xmm3 );
755  (~C)(i+1UL,j+1UL) = sum( xmm4 );
756  }
757  if( j < N ) {
758  IntrinsicType xmm1, xmm2;
759  for( size_t k=0UL; k<K; k+=IT::size ) {
760  const IntrinsicType b1( B.load(k,j) );
761  xmm1 = xmm1 + A.load(i ,k) * b1;
762  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
763  }
764  (~C)(i ,j) = sum( xmm1 );
765  (~C)(i+1UL,j) = sum( xmm2 );
766  }
767  }
768  if( i < M ) {
769  size_t j( 0UL );
770  for( ; (j+2UL) <= N; j+=2UL ) {
771  IntrinsicType xmm1, xmm2;
772  for( size_t k=0UL; k<K; k+=IT::size ) {
773  const IntrinsicType a1( A.load(i,k) );
774  xmm1 = xmm1 + a1 * B.load(k,j );
775  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
776  }
777  (~C)(i,j ) = sum( xmm1 );
778  (~C)(i,j+1UL) = sum( xmm2 );
779  }
780  if( j < N ) {
781  IntrinsicType xmm1, xmm2;
782  for( size_t k=0UL; k<K; k+=IT::size ) {
783  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
784  }
785  (~C)(i,j) = sum( xmm1 );
786  }
787  }
788  }
790  //**********************************************************************************************
791 
792  //**Default assignment to dense matrices********************************************************
806  template< typename MT3 // Type of the left-hand side target matrix
807  , typename MT4 // Type of the left-hand side matrix operand
808  , typename MT5 > // Type of the right-hand side matrix operand
809  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
810  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
811  {
812  selectDefaultAssignKernel( C, A, B );
813  }
815  //**********************************************************************************************
816 
817  //**BLAS-based assignment to dense matrices (single precision)**********************************
818 #if BLAZE_BLAS_MODE
819 
832  template< typename MT3 // Type of the left-hand side target matrix
833  , typename MT4 // Type of the left-hand side matrix operand
834  , typename MT5 > // Type of the right-hand side matrix operand
835  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
836  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
837  {
838  using boost::numeric_cast;
839 
843 
844  const int M ( numeric_cast<int>( A.rows() ) );
845  const int N ( numeric_cast<int>( B.columns() ) );
846  const int K ( numeric_cast<int>( A.columns() ) );
847  const int lda( numeric_cast<int>( A.spacing() ) );
848  const int ldb( numeric_cast<int>( B.spacing() ) );
849  const int ldc( numeric_cast<int>( C.spacing() ) );
850 
851  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
852  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
853  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
854  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
855  }
857 #endif
858  //**********************************************************************************************
859 
860  //**BLAS-based assignment to dense matrices (double precision)**********************************
861 #if BLAZE_BLAS_MODE
862 
875  template< typename MT3 // Type of the left-hand side target matrix
876  , typename MT4 // Type of the left-hand side matrix operand
877  , typename MT5 > // Type of the right-hand side matrix operand
878  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
879  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
880  {
881  using boost::numeric_cast;
882 
886 
887  const int M ( numeric_cast<int>( A.rows() ) );
888  const int N ( numeric_cast<int>( B.columns() ) );
889  const int K ( numeric_cast<int>( A.columns() ) );
890  const int lda( numeric_cast<int>( A.spacing() ) );
891  const int ldb( numeric_cast<int>( B.spacing() ) );
892  const int ldc( numeric_cast<int>( C.spacing() ) );
893 
894  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
895  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
896  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
897  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
898  }
900 #endif
901  //**********************************************************************************************
902 
903  //**BLAS-based assignment to dense matrices (single precision complex)**************************
904 #if BLAZE_BLAS_MODE
905 
918  template< typename MT3 // Type of the left-hand side target matrix
919  , typename MT4 // Type of the left-hand side matrix operand
920  , typename MT5 > // Type of the right-hand side matrix operand
921  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
922  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
923  {
924  using boost::numeric_cast;
925 
929  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
930  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
931  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
932 
933  const int M ( numeric_cast<int>( A.rows() ) );
934  const int N ( numeric_cast<int>( B.columns() ) );
935  const int K ( numeric_cast<int>( A.columns() ) );
936  const int lda( numeric_cast<int>( A.spacing() ) );
937  const int ldb( numeric_cast<int>( B.spacing() ) );
938  const int ldc( numeric_cast<int>( C.spacing() ) );
939  const complex<float> alpha( 1.0F, 0.0F );
940  const complex<float> beta ( 0.0F, 0.0F );
941 
942  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
943  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
944  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
945  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
946  }
948 #endif
949  //**********************************************************************************************
950 
951  //**BLAS-based assignment to dense matrices (double precision complex)**************************
952 #if BLAZE_BLAS_MODE
953 
966  template< typename MT3 // Type of the left-hand side target matrix
967  , typename MT4 // Type of the left-hand side matrix operand
968  , typename MT5 > // Type of the right-hand side matrix operand
969  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
970  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B )
971  {
972  using boost::numeric_cast;
973 
977  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
978  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
979  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
980 
981  const int M ( numeric_cast<int>( A.rows() ) );
982  const int N ( numeric_cast<int>( B.columns() ) );
983  const int K ( numeric_cast<int>( A.columns() ) );
984  const int lda( numeric_cast<int>( A.spacing() ) );
985  const int ldb( numeric_cast<int>( B.spacing() ) );
986  const int ldc( numeric_cast<int>( C.spacing() ) );
987  const complex<double> alpha( 1.0, 0.0 );
988  const complex<double> beta ( 0.0, 0.0 );
989 
990  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
991  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
992  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
993  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
994  }
996 #endif
997  //**********************************************************************************************
998 
999  //**Assignment to sparse matrices***************************************************************
1011  template< typename MT // Type of the target sparse matrix
1012  , bool SO > // Storage order of the target sparse matrix
1013  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1014  {
1016 
1017  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1018 
1025 
1026  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1027  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1028 
1029  const TmpType tmp( rhs );
1030  smpAssign( ~lhs, tmp );
1031  }
1033  //**********************************************************************************************
1034 
1035  //**Addition assignment to dense matrices*******************************************************
1048  template< typename MT // Type of the target dense matrix
1049  , bool SO > // Storage order of the target dense matrix
1050  friend inline void addAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1051  {
1053 
1054  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1055  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1056 
1057  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1058  return;
1059  }
1060 
1061  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1062  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1063 
1064  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1065  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1066  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1067  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1068  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1069  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1070 
1071  DMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1072  }
1074  //**********************************************************************************************
1075 
1076  //**Addition assignment to dense matrices (kernel selection)************************************
1087  template< typename MT3 // Type of the left-hand side target matrix
1088  , typename MT4 // Type of the left-hand side matrix operand
1089  , typename MT5 > // Type of the right-hand side matrix operand
1090  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1091  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1092  {
1093  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
1094  DMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1095  else
1096  DMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1097  }
1099  //**********************************************************************************************
1100 
1101  //**Addition assignment to dense matrices (kernel selection)************************************
1112  template< typename MT3 // Type of the left-hand side target matrix
1113  , typename MT4 // Type of the left-hand side matrix operand
1114  , typename MT5 > // Type of the right-hand side matrix operand
1115  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1116  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1117  {
1118  smpAddAssign( C, A * B );
1119  }
1121  //**********************************************************************************************
1122 
1123  //**Default addition assignment to dense matrices***********************************************
1137  template< typename MT3 // Type of the left-hand side target matrix
1138  , typename MT4 // Type of the left-hand side matrix operand
1139  , typename MT5 > // Type of the right-hand side matrix operand
1140  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1141  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1142  {
1143  const size_t M( A.rows() );
1144  const size_t N( B.columns() );
1145  const size_t K( A.columns() );
1146 
1147  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1148  const size_t end( N & size_t(-2) );
1149 
1150  for( size_t i=0UL; i<M; ++i ) {
1151  for( size_t k=0UL; k<K; ++k ) {
1152  for( size_t j=0UL; j<end; j+=2UL ) {
1153  C(i,j ) += A(i,k) * B(k,j );
1154  C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1155  }
1156  if( end < N ) {
1157  C(i,end) += A(i,k) * B(k,end);
1158  }
1159  }
1160  }
1161  }
1163  //**********************************************************************************************
1164 
1165  //**Vectorized default addition assignment to row-major dense matrices**************************
1179  template< typename MT3 // Type of the left-hand side target matrix
1180  , typename MT4 // Type of the left-hand side matrix operand
1181  , typename MT5 > // Type of the right-hand side matrix operand
1182  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1183  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1184  {
1185  typedef IntrinsicTrait<ElementType> IT;
1186 
1187  const size_t M( A.rows() );
1188  const size_t N( B.columns() );
1189  const size_t K( A.columns() );
1190 
1191  size_t i( 0UL );
1192 
1193  for( ; (i+2UL) <= M; i+=2UL ) {
1194  size_t j( 0UL );
1195  for( ; (j+4UL) <= N; j+=4UL ) {
1196  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1197  for( size_t k=0UL; k<K; k+=IT::size ) {
1198  const IntrinsicType a1( A.load(i ,k) );
1199  const IntrinsicType a2( A.load(i+1UL,k) );
1200  const IntrinsicType b1( B.load(k,j ) );
1201  const IntrinsicType b2( B.load(k,j+1UL) );
1202  const IntrinsicType b3( B.load(k,j+2UL) );
1203  const IntrinsicType b4( B.load(k,j+3UL) );
1204  xmm1 = xmm1 + a1 * b1;
1205  xmm2 = xmm2 + a1 * b2;
1206  xmm3 = xmm3 + a1 * b3;
1207  xmm4 = xmm4 + a1 * b4;
1208  xmm5 = xmm5 + a2 * b1;
1209  xmm6 = xmm6 + a2 * b2;
1210  xmm7 = xmm7 + a2 * b3;
1211  xmm8 = xmm8 + a2 * b4;
1212  }
1213  (~C)(i ,j ) += sum( xmm1 );
1214  (~C)(i ,j+1UL) += sum( xmm2 );
1215  (~C)(i ,j+2UL) += sum( xmm3 );
1216  (~C)(i ,j+3UL) += sum( xmm4 );
1217  (~C)(i+1UL,j ) += sum( xmm5 );
1218  (~C)(i+1UL,j+1UL) += sum( xmm6 );
1219  (~C)(i+1UL,j+2UL) += sum( xmm7 );
1220  (~C)(i+1UL,j+3UL) += sum( xmm8 );
1221  }
1222  for( ; (j+2UL) <= N; j+=2UL ) {
1223  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1224  for( size_t k=0UL; k<K; k+=IT::size ) {
1225  const IntrinsicType a1( A.load(i ,k) );
1226  const IntrinsicType a2( A.load(i+1UL,k) );
1227  const IntrinsicType b1( B.load(k,j ) );
1228  const IntrinsicType b2( B.load(k,j+1UL) );
1229  xmm1 = xmm1 + a1 * b1;
1230  xmm2 = xmm2 + a1 * b2;
1231  xmm3 = xmm3 + a2 * b1;
1232  xmm4 = xmm4 + a2 * b2;
1233  }
1234  (~C)(i ,j ) += sum( xmm1 );
1235  (~C)(i ,j+1UL) += sum( xmm2 );
1236  (~C)(i+1UL,j ) += sum( xmm3 );
1237  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1238  }
1239  if( j < N ) {
1240  IntrinsicType xmm1, xmm2;
1241  for( size_t k=0UL; k<K; k+=IT::size ) {
1242  const IntrinsicType b1( B.load(k,j) );
1243  xmm1 = xmm1 + A.load(i ,k) * b1;
1244  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1245  }
1246  (~C)(i ,j) += sum( xmm1 );
1247  (~C)(i+1UL,j) += sum( xmm2 );
1248  }
1249  }
1250  if( i < M ) {
1251  size_t j( 0UL );
1252  for( ; (j+4UL) <= N; j+=4UL ) {
1253  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1254  for( size_t k=0UL; k<K; k+=IT::size ) {
1255  const IntrinsicType a1( A.load(i,k) );
1256  xmm1 = xmm1 + a1 * B.load(k,j );
1257  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1258  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1259  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1260  }
1261  (~C)(i,j ) += sum( xmm1 );
1262  (~C)(i,j+1UL) += sum( xmm2 );
1263  (~C)(i,j+2UL) += sum( xmm3 );
1264  (~C)(i,j+3UL) += sum( xmm4 );
1265  }
1266  for( ; (j+2UL) <= N; j+=2UL ) {
1267  IntrinsicType xmm1, xmm2;
1268  for( size_t k=0UL; k<K; k+=IT::size ) {
1269  const IntrinsicType a1( A.load(i,k) );
1270  xmm1 = xmm1 + a1 * B.load(k,j );
1271  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1272  }
1273  (~C)(i,j ) += sum( xmm1 );
1274  (~C)(i,j+1UL) += sum( xmm2 );
1275  }
1276  if( j < N ) {
1277  IntrinsicType xmm1, xmm2;
1278  for( size_t k=0UL; k<K; k+=IT::size ) {
1279  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1280  }
1281  (~C)(i,j) += sum( xmm1 );
1282  }
1283  }
1284  }
1286  //**********************************************************************************************
1287 
1288  //**Vectorized default addition assignment to column-major dense matrices***********************
1302  template< typename MT3 // Type of the left-hand side target matrix
1303  , typename MT4 // Type of the left-hand side matrix operand
1304  , typename MT5 > // Type of the right-hand side matrix operand
1305  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1306  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1307  {
1308  typedef IntrinsicTrait<ElementType> IT;
1309 
1310  const size_t M( A.rows() );
1311  const size_t N( B.columns() );
1312  const size_t K( A.columns() );
1313 
1314  size_t i( 0UL );
1315 
1316  for( ; (i+4UL) <= M; i+=4UL ) {
1317  size_t j( 0UL );
1318  for( ; (j+2UL) <= N; j+=2UL ) {
1319  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1320  for( size_t k=0UL; k<K; k+=IT::size ) {
1321  const IntrinsicType a1( A.load(i ,k) );
1322  const IntrinsicType a2( A.load(i+1UL,k) );
1323  const IntrinsicType a3( A.load(i+2UL,k) );
1324  const IntrinsicType a4( A.load(i+3UL,k) );
1325  const IntrinsicType b1( B.load(k,j ) );
1326  const IntrinsicType b2( B.load(k,j+1UL) );
1327  xmm1 = xmm1 + a1 * b1;
1328  xmm2 = xmm2 + a1 * b2;
1329  xmm3 = xmm3 + a2 * b1;
1330  xmm4 = xmm4 + a2 * b2;
1331  xmm5 = xmm5 + a3 * b1;
1332  xmm6 = xmm6 + a3 * b2;
1333  xmm7 = xmm7 + a4 * b1;
1334  xmm8 = xmm8 + a4 * b2;
1335  }
1336  (~C)(i ,j ) += sum( xmm1 );
1337  (~C)(i ,j+1UL) += sum( xmm2 );
1338  (~C)(i+1UL,j ) += sum( xmm3 );
1339  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1340  (~C)(i+2UL,j ) += sum( xmm5 );
1341  (~C)(i+2UL,j+1UL) += sum( xmm6 );
1342  (~C)(i+3UL,j ) += sum( xmm7 );
1343  (~C)(i+3UL,j+1UL) += sum( xmm8 );
1344  }
1345  if( j < N ) {
1346  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1347  for( size_t k=0UL; k<K; k+=IT::size ) {
1348  const IntrinsicType b1( B.load(k,j) );
1349  xmm1 = xmm1 + A.load(i ,k) * b1;
1350  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1351  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1352  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1353  }
1354  (~C)(i ,j) += sum( xmm1 );
1355  (~C)(i+1UL,j) += sum( xmm2 );
1356  (~C)(i+2UL,j) += sum( xmm3 );
1357  (~C)(i+3UL,j) += sum( xmm4 );
1358  }
1359  }
1360  for( ; (i+2UL) <= M; i+=2UL ) {
1361  size_t j( 0UL );
1362  for( ; (j+2UL) <= N; j+=2UL ) {
1363  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1364  for( size_t k=0UL; k<K; k+=IT::size ) {
1365  const IntrinsicType a1( A.load(i ,k) );
1366  const IntrinsicType a2( A.load(i+1UL,k) );
1367  const IntrinsicType b1( B.load(k,j ) );
1368  const IntrinsicType b2( B.load(k,j+1UL) );
1369  xmm1 = xmm1 + a1 * b1;
1370  xmm2 = xmm2 + a1 * b2;
1371  xmm3 = xmm3 + a2 * b1;
1372  xmm4 = xmm4 + a2 * b2;
1373  }
1374  (~C)(i ,j ) += sum( xmm1 );
1375  (~C)(i ,j+1UL) += sum( xmm2 );
1376  (~C)(i+1UL,j ) += sum( xmm3 );
1377  (~C)(i+1UL,j+1UL) += sum( xmm4 );
1378  }
1379  if( j < N ) {
1380  IntrinsicType xmm1, xmm2;
1381  for( size_t k=0UL; k<K; k+=IT::size ) {
1382  const IntrinsicType b1( B.load(k,j) );
1383  xmm1 = xmm1 + A.load(i ,k) * b1;
1384  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1385  }
1386  (~C)(i ,j) += sum( xmm1 );
1387  (~C)(i+1UL,j) += sum( xmm2 );
1388  }
1389  }
1390  if( i < M ) {
1391  size_t j( 0UL );
1392  for( ; (j+2UL) <= N; j+=2UL ) {
1393  IntrinsicType xmm1, xmm2;
1394  for( size_t k=0UL; k<K; k+=IT::size ) {
1395  const IntrinsicType a1( A.load(i,k) );
1396  xmm1 = xmm1 + a1 * B.load(k,j );
1397  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1398  }
1399  (~C)(i,j ) += sum( xmm1 );
1400  (~C)(i,j+1UL) += sum( xmm2 );
1401  }
1402  if( j < N ) {
1403  IntrinsicType xmm1, xmm2;
1404  for( size_t k=0UL; k<K; k+=IT::size ) {
1405  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1406  }
1407  (~C)(i,j) += sum( xmm1 );
1408  }
1409  }
1410  }
1412  //**********************************************************************************************
1413 
1414  //**Default addition assignment to dense matrices***********************************************
1428  template< typename MT3 // Type of the left-hand side target matrix
1429  , typename MT4 // Type of the left-hand side matrix operand
1430  , typename MT5 > // Type of the right-hand side matrix operand
1431  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1432  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1433  {
1434  selectDefaultAddAssignKernel( C, A, B );
1435  }
1437  //**********************************************************************************************
1438 
1439  //**BLAS-based addition assignment to dense matrices (single precision)*************************
1440 #if BLAZE_BLAS_MODE
1441 
1454  template< typename MT3 // Type of the left-hand side target matrix
1455  , typename MT4 // Type of the left-hand side matrix operand
1456  , typename MT5 > // Type of the right-hand side matrix operand
1457  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1458  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1459  {
1460  using boost::numeric_cast;
1461 
1465 
1466  const int M ( numeric_cast<int>( A.rows() ) );
1467  const int N ( numeric_cast<int>( B.columns() ) );
1468  const int K ( numeric_cast<int>( A.columns() ) );
1469  const int lda( numeric_cast<int>( A.spacing() ) );
1470  const int ldb( numeric_cast<int>( B.spacing() ) );
1471  const int ldc( numeric_cast<int>( C.spacing() ) );
1472 
1473  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1474  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1475  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1476  M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1477  }
1479 #endif
1480  //**********************************************************************************************
1481 
1482  //**BLAS-based addition assignment to dense matrices (double precision)*************************
1483 #if BLAZE_BLAS_MODE
1484 
1497  template< typename MT3 // Type of the left-hand side target matrix
1498  , typename MT4 // Type of the left-hand side matrix operand
1499  , typename MT5 > // Type of the right-hand side matrix operand
1500  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1501  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1502  {
1503  using boost::numeric_cast;
1504 
1508 
1509  const int M ( numeric_cast<int>( A.rows() ) );
1510  const int N ( numeric_cast<int>( B.columns() ) );
1511  const int K ( numeric_cast<int>( A.columns() ) );
1512  const int lda( numeric_cast<int>( A.spacing() ) );
1513  const int ldb( numeric_cast<int>( B.spacing() ) );
1514  const int ldc( numeric_cast<int>( C.spacing() ) );
1515 
1516  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1517  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1518  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1519  M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1520  }
1522 #endif
1523  //**********************************************************************************************
1524 
1525  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
1526 #if BLAZE_BLAS_MODE
1527 
1540  template< typename MT3 // Type of the left-hand side target matrix
1541  , typename MT4 // Type of the left-hand side matrix operand
1542  , typename MT5 > // Type of the right-hand side matrix operand
1543  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1544  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1545  {
1546  using boost::numeric_cast;
1547 
1551  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
1552  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
1553  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
1554 
1555  const int M ( numeric_cast<int>( A.rows() ) );
1556  const int N ( numeric_cast<int>( B.columns() ) );
1557  const int K ( numeric_cast<int>( A.columns() ) );
1558  const int lda( numeric_cast<int>( A.spacing() ) );
1559  const int ldb( numeric_cast<int>( B.spacing() ) );
1560  const int ldc( numeric_cast<int>( C.spacing() ) );
1561  const complex<float> alpha( 1.0F, 0.0F );
1562  const complex<float> beta ( 1.0F, 0.0F );
1563 
1564  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1565  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1566  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1567  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1568  }
1570 #endif
1571  //**********************************************************************************************
1572 
1573  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
1574 #if BLAZE_BLAS_MODE
1575 
1588  template< typename MT3 // Type of the left-hand side target matrix
1589  , typename MT4 // Type of the left-hand side matrix operand
1590  , typename MT5 > // Type of the right-hand side matrix operand
1591  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1592  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B )
1593  {
1594  using boost::numeric_cast;
1595 
1599  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
1600  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
1601  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
1602 
1603  const int M ( numeric_cast<int>( A.rows() ) );
1604  const int N ( numeric_cast<int>( B.columns() ) );
1605  const int K ( numeric_cast<int>( A.columns() ) );
1606  const int lda( numeric_cast<int>( A.spacing() ) );
1607  const int ldb( numeric_cast<int>( B.spacing() ) );
1608  const int ldc( numeric_cast<int>( C.spacing() ) );
1609  const complex<double> alpha( 1.0, 0.0 );
1610  const complex<double> beta ( 1.0, 0.0 );
1611 
1612  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1613  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1614  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1615  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1616  }
1618 #endif
1619  //**********************************************************************************************
1620 
1621  //**Addition assignment to sparse matrices******************************************************
1622  // No special implementation for the addition assignment to sparse matrices.
1623  //**********************************************************************************************
1624 
1625  //**Subtraction assignment to dense matrices****************************************************
1638  template< typename MT // Type of the target dense matrix
1639  , bool SO > // Storage order of the target dense matrix
1640  friend inline void subAssign( DenseMatrix<MT,SO>& lhs, const DMatTDMatMultExpr& rhs )
1641  {
1643 
1644  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
1645  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
1646 
1647  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1648  return;
1649  }
1650 
1651  LT A( rhs.lhs_ ); // Evaluation of the left-hand side dense matrix operand
1652  RT B( rhs.rhs_ ); // Evaluation of the right-hand side dense matrix operand
1653 
1654  BLAZE_INTERNAL_ASSERT( A.rows() == rhs.lhs_.rows() , "Invalid number of rows" );
1655  BLAZE_INTERNAL_ASSERT( A.columns() == rhs.lhs_.columns(), "Invalid number of columns" );
1656  BLAZE_INTERNAL_ASSERT( B.rows() == rhs.rhs_.rows() , "Invalid number of rows" );
1657  BLAZE_INTERNAL_ASSERT( B.columns() == rhs.rhs_.columns(), "Invalid number of columns" );
1658  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
1659  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns() , "Invalid number of columns" );
1660 
1661  DMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1662  }
1664  //**********************************************************************************************
1665 
1666  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1677  template< typename MT3 // Type of the left-hand side target matrix
1678  , typename MT4 // Type of the left-hand side matrix operand
1679  , typename MT5 > // Type of the right-hand side matrix operand
1680  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1681  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1682  {
1683  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
1684  DMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1685  else
1686  DMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1687  }
1689  //**********************************************************************************************
1690 
1691  //**Subtraction assignment to dense matrices (kernel selection)*********************************
1702  template< typename MT3 // Type of the left-hand side target matrix
1703  , typename MT4 // Type of the left-hand side matrix operand
1704  , typename MT5 > // Type of the right-hand side matrix operand
1705  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1706  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1707  {
1708  smpSubAssign( C, A * B );
1709  }
1711  //**********************************************************************************************
1712 
1713  //**Default subtraction assignment to dense matrices********************************************
1727  template< typename MT3 // Type of the left-hand side target matrix
1728  , typename MT4 // Type of the left-hand side matrix operand
1729  , typename MT5 > // Type of the right-hand side matrix operand
1730  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1731  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
1732  {
1733  const size_t M( A.rows() );
1734  const size_t N( B.columns() );
1735  const size_t K( A.columns() );
1736 
1737  BLAZE_INTERNAL_ASSERT( ( N - ( N % 2UL ) ) == ( N & size_t(-2) ), "Invalid end calculation" );
1738  const size_t end( N & size_t(-2) );
1739 
1740  for( size_t i=0UL; i<M; ++i ) {
1741  for( size_t k=0UL; k<K; ++k ) {
1742  for( size_t j=0UL; j<end; j+=2UL ) {
1743  C(i,j ) -= A(i,k) * B(k,j );
1744  C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1745  }
1746  if( end < N ) {
1747  C(i,end) -= A(i,k) * B(k,end);
1748  }
1749  }
1750  }
1751  }
1753  //**********************************************************************************************
1754 
1755  //**Default subtraction assignment to row-major dense matrices**********************************
1769  template< typename MT3 // Type of the left-hand side target matrix
1770  , typename MT4 // Type of the left-hand side matrix operand
1771  , typename MT5 > // Type of the right-hand side matrix operand
1772  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1773  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B )
1774  {
1775  typedef IntrinsicTrait<ElementType> IT;
1776 
1777  const size_t M( A.rows() );
1778  const size_t N( B.columns() );
1779  const size_t K( A.columns() );
1780 
1781  size_t i( 0UL );
1782 
1783  for( ; (i+2UL) <= M; i+=2UL ) {
1784  size_t j( 0UL );
1785  for( ; (j+4UL) <= N; j+=4UL ) {
1786  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1787  for( size_t k=0UL; k<K; k+=IT::size ) {
1788  const IntrinsicType a1( A.load(i ,k) );
1789  const IntrinsicType a2( A.load(i+1UL,k) );
1790  const IntrinsicType b1( B.load(k,j ) );
1791  const IntrinsicType b2( B.load(k,j+1UL) );
1792  const IntrinsicType b3( B.load(k,j+2UL) );
1793  const IntrinsicType b4( B.load(k,j+3UL) );
1794  xmm1 = xmm1 + a1 * b1;
1795  xmm2 = xmm2 + a1 * b2;
1796  xmm3 = xmm3 + a1 * b3;
1797  xmm4 = xmm4 + a1 * b4;
1798  xmm5 = xmm5 + a2 * b1;
1799  xmm6 = xmm6 + a2 * b2;
1800  xmm7 = xmm7 + a2 * b3;
1801  xmm8 = xmm8 + a2 * b4;
1802  }
1803  (~C)(i ,j ) -= sum( xmm1 );
1804  (~C)(i ,j+1UL) -= sum( xmm2 );
1805  (~C)(i ,j+2UL) -= sum( xmm3 );
1806  (~C)(i ,j+3UL) -= sum( xmm4 );
1807  (~C)(i+1UL,j ) -= sum( xmm5 );
1808  (~C)(i+1UL,j+1UL) -= sum( xmm6 );
1809  (~C)(i+1UL,j+2UL) -= sum( xmm7 );
1810  (~C)(i+1UL,j+3UL) -= sum( xmm8 );
1811  }
1812  for( ; (j+2UL) <= N; j+=2UL ) {
1813  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1814  for( size_t k=0UL; k<K; k+=IT::size ) {
1815  const IntrinsicType a1( A.load(i ,k) );
1816  const IntrinsicType a2( A.load(i+1UL,k) );
1817  const IntrinsicType b1( B.load(k,j ) );
1818  const IntrinsicType b2( B.load(k,j+1UL) );
1819  xmm1 = xmm1 + a1 * b1;
1820  xmm2 = xmm2 + a1 * b2;
1821  xmm3 = xmm3 + a2 * b1;
1822  xmm4 = xmm4 + a2 * b2;
1823  }
1824  (~C)(i ,j ) -= sum( xmm1 );
1825  (~C)(i ,j+1UL) -= sum( xmm2 );
1826  (~C)(i+1UL,j ) -= sum( xmm3 );
1827  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1828  }
1829  if( j < N ) {
1830  IntrinsicType xmm1, xmm2;
1831  for( size_t k=0UL; k<K; k+=IT::size ) {
1832  const IntrinsicType b1( B.load(k,j) );
1833  xmm1 = xmm1 + A.load(i ,k) * b1;
1834  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1835  }
1836  (~C)(i ,j) -= sum( xmm1 );
1837  (~C)(i+1UL,j) -= sum( xmm2 );
1838  }
1839  }
1840  if( i < M ) {
1841  size_t j( 0UL );
1842  for( ; (j+4UL) <= N; j+=4UL ) {
1843  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1844  for( size_t k=0UL; k<K; k+=IT::size ) {
1845  const IntrinsicType a1( A.load(i,k) );
1846  xmm1 = xmm1 + a1 * B.load(k,j );
1847  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1848  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1849  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1850  }
1851  (~C)(i,j ) -= sum( xmm1 );
1852  (~C)(i,j+1UL) -= sum( xmm2 );
1853  (~C)(i,j+2UL) -= sum( xmm3 );
1854  (~C)(i,j+3UL) -= sum( xmm4 );
1855  }
1856  for( ; (j+2UL) <= N; j+=2UL ) {
1857  IntrinsicType xmm1, xmm2;
1858  for( size_t k=0UL; k<K; k+=IT::size ) {
1859  const IntrinsicType a1( A.load(i,k) );
1860  xmm1 = xmm1 + a1 * B.load(k,j );
1861  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1862  }
1863  (~C)(i,j ) -= sum( xmm1 );
1864  (~C)(i,j+1UL) -= sum( xmm2 );
1865  }
1866  if( j < N ) {
1867  IntrinsicType xmm1, xmm2;
1868  for( size_t k=0UL; k<K; k+=IT::size ) {
1869  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1870  }
1871  (~C)(i,j) -= sum( xmm1 );
1872  }
1873  }
1874  }
1876  //**********************************************************************************************
1877 
1878  //**Default subtraction assignment to column-major dense matrices*******************************
1892  template< typename MT3 // Type of the left-hand side target matrix
1893  , typename MT4 // Type of the left-hand side matrix operand
1894  , typename MT5 > // Type of the right-hand side matrix operand
1895  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1896  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B )
1897  {
1898  typedef IntrinsicTrait<ElementType> IT;
1899 
1900  const size_t M( A.rows() );
1901  const size_t N( B.columns() );
1902  const size_t K( A.columns() );
1903 
1904  size_t i( 0UL );
1905 
1906  for( ; (i+4UL) <= M; i+=4UL ) {
1907  size_t j( 0UL );
1908  for( ; (j+2UL) <= N; j+=2UL ) {
1909  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1910  for( size_t k=0UL; k<K; k+=IT::size ) {
1911  const IntrinsicType a1( A.load(i ,k) );
1912  const IntrinsicType a2( A.load(i+1UL,k) );
1913  const IntrinsicType a3( A.load(i+2UL,k) );
1914  const IntrinsicType a4( A.load(i+3UL,k) );
1915  const IntrinsicType b1( B.load(k,j ) );
1916  const IntrinsicType b2( B.load(k,j+1UL) );
1917  xmm1 = xmm1 + a1 * b1;
1918  xmm2 = xmm2 + a1 * b2;
1919  xmm3 = xmm3 + a2 * b1;
1920  xmm4 = xmm4 + a2 * b2;
1921  xmm5 = xmm5 + a3 * b1;
1922  xmm6 = xmm6 + a3 * b2;
1923  xmm7 = xmm7 + a4 * b1;
1924  xmm8 = xmm8 + a4 * b2;
1925  }
1926  (~C)(i ,j ) -= sum( xmm1 );
1927  (~C)(i ,j+1UL) -= sum( xmm2 );
1928  (~C)(i+1UL,j ) -= sum( xmm3 );
1929  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1930  (~C)(i+2UL,j ) -= sum( xmm5 );
1931  (~C)(i+2UL,j+1UL) -= sum( xmm6 );
1932  (~C)(i+3UL,j ) -= sum( xmm7 );
1933  (~C)(i+3UL,j+1UL) -= sum( xmm8 );
1934  }
1935  if( j < N ) {
1936  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1937  for( size_t k=0UL; k<K; k+=IT::size ) {
1938  const IntrinsicType b1( B.load(k,j) );
1939  xmm1 = xmm1 + A.load(i ,k) * b1;
1940  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1941  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1942  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1943  }
1944  (~C)(i ,j) -= sum( xmm1 );
1945  (~C)(i+1UL,j) -= sum( xmm2 );
1946  (~C)(i+2UL,j) -= sum( xmm3 );
1947  (~C)(i+3UL,j) -= sum( xmm4 );
1948  }
1949  }
1950  for( ; (i+2UL) <= M; i+=2UL ) {
1951  size_t j( 0UL );
1952  for( ; (j+2UL) <= N; j+=2UL ) {
1953  IntrinsicType xmm1, xmm2, xmm3, xmm4;
1954  for( size_t k=0UL; k<K; k+=IT::size ) {
1955  const IntrinsicType a1( A.load(i ,k) );
1956  const IntrinsicType a2( A.load(i+1UL,k) );
1957  const IntrinsicType b1( B.load(k,j ) );
1958  const IntrinsicType b2( B.load(k,j+1UL) );
1959  xmm1 = xmm1 + a1 * b1;
1960  xmm2 = xmm2 + a1 * b2;
1961  xmm3 = xmm3 + a2 * b1;
1962  xmm4 = xmm4 + a2 * b2;
1963  }
1964  (~C)(i ,j ) -= sum( xmm1 );
1965  (~C)(i ,j+1UL) -= sum( xmm2 );
1966  (~C)(i+1UL,j ) -= sum( xmm3 );
1967  (~C)(i+1UL,j+1UL) -= sum( xmm4 );
1968  }
1969  if( j < N ) {
1970  IntrinsicType xmm1, xmm2;
1971  for( size_t k=0UL; k<K; k+=IT::size ) {
1972  const IntrinsicType b1( B.load(k,j) );
1973  xmm1 = xmm1 + A.load(i ,k) * b1;
1974  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1975  }
1976  (~C)(i ,j) -= sum( xmm1 );
1977  (~C)(i+1UL,j) -= sum( xmm2 );
1978  }
1979  }
1980  if( i < M ) {
1981  size_t j( 0UL );
1982  for( ; (j+2UL) <= N; j+=2UL ) {
1983  IntrinsicType xmm1, xmm2;
1984  for( size_t k=0UL; k<K; k+=IT::size ) {
1985  const IntrinsicType a1( A.load(i,k) );
1986  xmm1 = xmm1 + a1 * B.load(k,j );
1987  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1988  }
1989  (~C)(i,j ) -= sum( xmm1 );
1990  (~C)(i,j+1UL) -= sum( xmm2 );
1991  }
1992  if( j < N ) {
1993  IntrinsicType xmm1, xmm2;
1994  for( size_t k=0UL; k<K; k+=IT::size ) {
1995  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1996  }
1997  (~C)(i,j) -= sum( xmm1 );
1998  }
1999  }
2000  }
2002  //**********************************************************************************************
2003 
2004  //**Default subtraction assignment to dense matrices********************************************
2018  template< typename MT3 // Type of the left-hand side target matrix
2019  , typename MT4 // Type of the left-hand side matrix operand
2020  , typename MT5 > // Type of the right-hand side matrix operand
2021  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2022  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2023  {
2024  selectDefaultSubAssignKernel( C, A, B );
2025  }
2027  //**********************************************************************************************
2028 
2029  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
2030 #if BLAZE_BLAS_MODE
2031 
2044  template< typename MT3 // Type of the left-hand side target matrix
2045  , typename MT4 // Type of the left-hand side matrix operand
2046  , typename MT5 > // Type of the right-hand side matrix operand
2047  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2048  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2049  {
2050  using boost::numeric_cast;
2051 
2055 
2056  const int M ( numeric_cast<int>( A.rows() ) );
2057  const int N ( numeric_cast<int>( B.columns() ) );
2058  const int K ( numeric_cast<int>( A.columns() ) );
2059  const int lda( numeric_cast<int>( A.spacing() ) );
2060  const int ldb( numeric_cast<int>( B.spacing() ) );
2061  const int ldc( numeric_cast<int>( C.spacing() ) );
2062 
2063  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2064  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2065  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2066  M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
2067  }
2069 #endif
2070  //**********************************************************************************************
2071 
2072  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
2073 #if BLAZE_BLAS_MODE
2074 
2087  template< typename MT3 // Type of the left-hand side target matrix
2088  , typename MT4 // Type of the left-hand side matrix operand
2089  , typename MT5 > // Type of the right-hand side matrix operand
2090  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2091  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2092  {
2093  using boost::numeric_cast;
2094 
2098 
2099  const int M ( numeric_cast<int>( A.rows() ) );
2100  const int N ( numeric_cast<int>( B.columns() ) );
2101  const int K ( numeric_cast<int>( A.columns() ) );
2102  const int lda( numeric_cast<int>( A.spacing() ) );
2103  const int ldb( numeric_cast<int>( B.spacing() ) );
2104  const int ldc( numeric_cast<int>( C.spacing() ) );
2105 
2106  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2107  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2108  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2109  M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
2110  }
2112 #endif
2113  //**********************************************************************************************
2114 
2115  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
2116 #if BLAZE_BLAS_MODE
2117 
2130  template< typename MT3 // Type of the left-hand side target matrix
2131  , typename MT4 // Type of the left-hand side matrix operand
2132  , typename MT5 > // Type of the right-hand side matrix operand
2133  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2134  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2135  {
2136  using boost::numeric_cast;
2137 
2141  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
2142  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
2143  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
2144 
2145  const int M ( numeric_cast<int>( A.rows() ) );
2146  const int N ( numeric_cast<int>( B.columns() ) );
2147  const int K ( numeric_cast<int>( A.columns() ) );
2148  const int lda( numeric_cast<int>( A.spacing() ) );
2149  const int ldb( numeric_cast<int>( B.spacing() ) );
2150  const int ldc( numeric_cast<int>( C.spacing() ) );
2151  const complex<float> alpha( -1.0F, 0.0F );
2152  const complex<float> beta ( 1.0F, 0.0F );
2153 
2154  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2155  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2156  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2157  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2158  }
2160 #endif
2161  //**********************************************************************************************
2162 
2163  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
2164 #if BLAZE_BLAS_MODE
2165 
2178  template< typename MT3 // Type of the left-hand side target matrix
2179  , typename MT4 // Type of the left-hand side matrix operand
2180  , typename MT5 > // Type of the right-hand side matrix operand
2181  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2182  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B )
2183  {
2184  using boost::numeric_cast;
2185 
2189  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
2190  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
2191  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
2192 
2193  const int M ( numeric_cast<int>( A.rows() ) );
2194  const int N ( numeric_cast<int>( B.columns() ) );
2195  const int K ( numeric_cast<int>( A.columns() ) );
2196  const int lda( numeric_cast<int>( A.spacing() ) );
2197  const int ldb( numeric_cast<int>( B.spacing() ) );
2198  const int ldc( numeric_cast<int>( C.spacing() ) );
2199  const complex<double> alpha( -1.0, 0.0 );
2200  const complex<double> beta ( 1.0, 0.0 );
2201 
2202  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2203  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2204  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2205  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2206  }
2208 #endif
2209  //**********************************************************************************************
2210 
2211  //**Subtraction assignment to sparse matrices***************************************************
2212  // No special implementation for the subtraction assignment to sparse matrices.
2213  //**********************************************************************************************
2214 
2215  //**Multiplication assignment to dense matrices*************************************************
2216  // No special implementation for the multiplication assignment to dense matrices.
2217  //**********************************************************************************************
2218 
2219  //**Multiplication assignment to sparse matrices************************************************
2220  // No special implementation for the multiplication assignment to sparse matrices.
2221  //**********************************************************************************************
2222 
2223  //**Compile time checks*************************************************************************
2230  //**********************************************************************************************
2231 };
2232 //*************************************************************************************************
2233 
2234 
2235 
2236 
2237 //=================================================================================================
2238 //
2239 // DMATSCALARMULTEXPR SPECIALIZATION
2240 //
2241 //=================================================================================================
2242 
2243 //*************************************************************************************************
2251 template< typename MT1 // Type of the left-hand side dense matrix
2252  , typename MT2 // Type of the right-hand side dense matrix
2253  , typename ST > // Type of the right-hand side scalar value
2254 class DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >
2255  : public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
2256  , private MatScalarMultExpr
2257  , private Computation
2258 {
2259  private:
2260  //**Type definitions****************************************************************************
2261  typedef DMatTDMatMultExpr<MT1,MT2> MMM;
2262  typedef typename MMM::ResultType RES;
2263  typedef typename MT1::ResultType RT1;
2264  typedef typename MT2::ResultType RT2;
2265  typedef typename RT1::ElementType ET1;
2266  typedef typename RT2::ElementType ET2;
2267  typedef typename MT1::CompositeType CT1;
2268  typedef typename MT2::CompositeType CT2;
2269  //**********************************************************************************************
2270 
2271  //**********************************************************************************************
2273  enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2274  //**********************************************************************************************
2275 
2276  //**********************************************************************************************
2278  enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2279  //**********************************************************************************************
2280 
2281  //**********************************************************************************************
2283 
2285  template< typename T1, typename T2, typename T3, typename T4 >
2286  struct UseSMPAssignKernel {
2287  enum { value = evaluateLeft || evaluateRight };
2288  };
2289  //**********************************************************************************************
2290 
2291  //**********************************************************************************************
2293 
2296  template< typename T1, typename T2, typename T3, typename T4 >
2297  struct UseSinglePrecisionKernel {
2298  enum { value = IsFloat<typename T1::ElementType>::value &&
2299  IsFloat<typename T2::ElementType>::value &&
2300  IsFloat<typename T3::ElementType>::value &&
2301  !IsComplex<T4>::value };
2302  };
2303  //**********************************************************************************************
2304 
2305  //**********************************************************************************************
2307 
2310  template< typename T1, typename T2, typename T3, typename T4 >
2311  struct UseDoublePrecisionKernel {
2312  enum { value = IsDouble<typename T1::ElementType>::value &&
2313  IsDouble<typename T2::ElementType>::value &&
2314  IsDouble<typename T3::ElementType>::value &&
2315  !IsComplex<T4>::value };
2316  };
2317  //**********************************************************************************************
2318 
2319  //**********************************************************************************************
2321 
2324  template< typename T1, typename T2, typename T3 >
2325  struct UseSinglePrecisionComplexKernel {
2326  typedef complex<float> Type;
2327  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2328  IsSame<typename T2::ElementType,Type>::value &&
2329  IsSame<typename T3::ElementType,Type>::value };
2330  };
2331  //**********************************************************************************************
2332 
2333  //**********************************************************************************************
2335 
2338  template< typename T1, typename T2, typename T3 >
2339  struct UseDoublePrecisionComplexKernel {
2340  typedef complex<double> Type;
2341  enum { value = IsSame<typename T1::ElementType,Type>::value &&
2342  IsSame<typename T2::ElementType,Type>::value &&
2343  IsSame<typename T3::ElementType,Type>::value };
2344  };
2345  //**********************************************************************************************
2346 
2347  //**********************************************************************************************
2349 
2351  template< typename T1, typename T2, typename T3, typename T4 >
2352  struct UseDefaultKernel {
2353  enum { value = !BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2354  !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2355  !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2356  !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2357  };
2358  //**********************************************************************************************
2359 
2360  //**********************************************************************************************
2362 
2364  template< typename T1, typename T2, typename T3, typename T4 >
2365  struct UseVectorizedDefaultKernel {
2366  enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2367  IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2368  IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2369  IsSame<typename T1::ElementType,T4>::value &&
2370  IntrinsicTrait<typename T1::ElementType>::addition &&
2371  IntrinsicTrait<typename T1::ElementType>::multiplication };
2372  };
2373  //**********************************************************************************************
2374 
2375  public:
2376  //**Type definitions****************************************************************************
2377  typedef DMatScalarMultExpr<MMM,ST,false> This;
2378  typedef typename MultTrait<RES,ST>::Type ResultType;
2379  typedef typename ResultType::OppositeType OppositeType;
2380  typedef typename ResultType::TransposeType TransposeType;
2381  typedef typename ResultType::ElementType ElementType;
2382  typedef typename IntrinsicTrait<ElementType>::Type IntrinsicType;
2383  typedef const ElementType ReturnType;
2384  typedef const ResultType CompositeType;
2385 
2387  typedef const DMatTDMatMultExpr<MT1,MT2> LeftOperand;
2388 
2390  typedef ST RightOperand;
2391 
2393  typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type LT;
2394 
2396  typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type RT;
2397  //**********************************************************************************************
2398 
2399  //**Compilation flags***************************************************************************
2401  enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2402  IsSame<ET1,ET2>::value &&
2403  IsSame<ET1,ST>::value &&
2404  IntrinsicTrait<ET1>::addition &&
2405  IntrinsicTrait<ET1>::multiplication };
2406 
2408  enum { smpAssignable = !evaluateLeft && !evaluateRight };
2409  //**********************************************************************************************
2410 
2411  //**Constructor*********************************************************************************
2417  explicit inline DMatScalarMultExpr( const MMM& matrix, ST scalar )
2418  : matrix_( matrix ) // Left-hand side dense matrix of the multiplication expression
2419  , scalar_( scalar ) // Right-hand side scalar of the multiplication expression
2420  {}
2421  //**********************************************************************************************
2422 
2423  //**Access operator*****************************************************************************
2430  inline ReturnType operator()( size_t i, size_t j ) const {
2431  BLAZE_INTERNAL_ASSERT( i < matrix_.rows() , "Invalid row access index" );
2432  BLAZE_INTERNAL_ASSERT( j < matrix_.columns(), "Invalid column access index" );
2433  return matrix_(i,j) * scalar_;
2434  }
2435  //**********************************************************************************************
2436 
2437  //**Rows function*******************************************************************************
2442  inline size_t rows() const {
2443  return matrix_.rows();
2444  }
2445  //**********************************************************************************************
2446 
2447  //**Columns function****************************************************************************
2452  inline size_t columns() const {
2453  return matrix_.columns();
2454  }
2455  //**********************************************************************************************
2456 
2457  //**Left operand access*************************************************************************
2462  inline LeftOperand leftOperand() const {
2463  return matrix_;
2464  }
2465  //**********************************************************************************************
2466 
2467  //**Right operand access************************************************************************
2472  inline RightOperand rightOperand() const {
2473  return scalar_;
2474  }
2475  //**********************************************************************************************
2476 
2477  //**********************************************************************************************
2483  template< typename T >
2484  inline bool canAlias( const T* alias ) const {
2485  return matrix_.canAlias( alias );
2486  }
2487  //**********************************************************************************************
2488 
2489  //**********************************************************************************************
2495  template< typename T >
2496  inline bool isAliased( const T* alias ) const {
2497  return matrix_.isAliased( alias );
2498  }
2499  //**********************************************************************************************
2500 
2501  //**********************************************************************************************
2506  inline bool isAligned() const {
2507  return matrix_.isAligned();
2508  }
2509  //**********************************************************************************************
2510 
2511  //**********************************************************************************************
2516  inline bool canSMPAssign() const {
2517  typename MMM::LeftOperand A( matrix_.leftOperand() );
2518  return ( !BLAZE_BLAS_IS_PARALLEL ||
2519  ( rows() * columns() < DMATTDMATMULT_THRESHOLD ) ) &&
2520  ( A.rows() > SMP_DMATTDMATMULT_THRESHOLD );
2521  }
2522  //**********************************************************************************************
2523 
2524  private:
2525  //**Member variables****************************************************************************
2526  LeftOperand matrix_;
2527  RightOperand scalar_;
2528  //**********************************************************************************************
2529 
2530  //**Assignment to dense matrices****************************************************************
2539  template< typename MT3 // Type of the target dense matrix
2540  , bool SO > // Storage order of the target dense matrix
2541  friend inline void assign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
2542  {
2544 
2545  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
2546  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
2547 
2548  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2549  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2550 
2551  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL ) {
2552  return;
2553  }
2554  else if( left.columns() == 0UL ) {
2555  reset( ~lhs );
2556  return;
2557  }
2558 
2559  LT A( left ); // Evaluation of the left-hand side dense matrix operand
2560  RT B( right ); // Evaluation of the right-hand side dense matrix operand
2561 
2562  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
2563  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
2564  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
2565  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
2566  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
2567  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
2568 
2569  DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2570  }
2571  //**********************************************************************************************
2572 
2573  //**Assignment to dense matrices (kernel selection)*********************************************
2584  template< typename MT3 // Type of the left-hand side target matrix
2585  , typename MT4 // Type of the left-hand side matrix operand
2586  , typename MT5 // Type of the right-hand side matrix operand
2587  , typename ST2 > // Type of the scalar value
2588  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2589  selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2590  {
2591  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
2592  DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2593  else
2594  DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2595  }
2596  //**********************************************************************************************
2597 
2598  //**Assignment to dense matrices (kernel selection)*********************************************
2609  template< typename MT3 // Type of the left-hand side target matrix
2610  , typename MT4 // Type of the left-hand side matrix operand
2611  , typename MT5 // Type of the right-hand side matrix operand
2612  , typename ST2 > // Type of the scalar value
2613  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2614  selectAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2615  {
2616  smpAssign( C, A * B * scalar );
2617  }
2618  //**********************************************************************************************
2619 
2620  //**Default assignment to dense matrices********************************************************
2634  template< typename MT3 // Type of the left-hand side target matrix
2635  , typename MT4 // Type of the left-hand side matrix operand
2636  , typename MT5 // Type of the right-hand side matrix operand
2637  , typename ST2 > // Type of the scalar value
2638  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2639  selectDefaultAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2640  {
2641  for( size_t i=0UL; i<A.rows(); ++i ) {
2642  for( size_t k=0UL; k<B.columns(); ++k ) {
2643  C(i,k) = A(i,0UL) * B(0UL,k);
2644  }
2645  for( size_t j=1UL; j<A.columns(); ++j ) {
2646  for( size_t k=0UL; k<B.columns(); ++k ) {
2647  C(i,k) += A(i,j) * B(j,k);
2648  }
2649  }
2650  for( size_t k=0UL; k<B.columns(); ++k ) {
2651  C(i,k) *= scalar;
2652  }
2653  }
2654  }
2655  //**********************************************************************************************
2656 
2657  //**Vectorized default assignment to row-major dense matrices***********************************
2671  template< typename MT3 // Type of the left-hand side target matrix
2672  , typename MT4 // Type of the left-hand side matrix operand
2673  , typename MT5 // Type of the right-hand side matrix operand
2674  , typename ST2 > // Type of the scalar value
2675  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2676  selectDefaultAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
2677  {
2678  typedef IntrinsicTrait<ElementType> IT;
2679 
2680  const size_t M( A.rows() );
2681  const size_t N( B.columns() );
2682  const size_t K( A.columns() );
2683 
2684  size_t i( 0UL );
2685 
2686  for( ; (i+2UL) <= M; i+=2UL ) {
2687  size_t j( 0UL );
2688  for( ; (j+4UL) <= N; j+=4UL ) {
2689  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2690  for( size_t k=0UL; k<K; k+=IT::size ) {
2691  const IntrinsicType a1( A.load(i ,k) );
2692  const IntrinsicType a2( A.load(i+1UL,k) );
2693  const IntrinsicType b1( B.load(k,j ) );
2694  const IntrinsicType b2( B.load(k,j+1UL) );
2695  const IntrinsicType b3( B.load(k,j+2UL) );
2696  const IntrinsicType b4( B.load(k,j+3UL) );
2697  xmm1 = xmm1 + a1 * b1;
2698  xmm2 = xmm2 + a1 * b2;
2699  xmm3 = xmm3 + a1 * b3;
2700  xmm4 = xmm4 + a1 * b4;
2701  xmm5 = xmm5 + a2 * b1;
2702  xmm6 = xmm6 + a2 * b2;
2703  xmm7 = xmm7 + a2 * b3;
2704  xmm8 = xmm8 + a2 * b4;
2705  }
2706  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2707  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2708  (~C)(i ,j+2UL) = sum( xmm3 ) * scalar;
2709  (~C)(i ,j+3UL) = sum( xmm4 ) * scalar;
2710  (~C)(i+1UL,j ) = sum( xmm5 ) * scalar;
2711  (~C)(i+1UL,j+1UL) = sum( xmm6 ) * scalar;
2712  (~C)(i+1UL,j+2UL) = sum( xmm7 ) * scalar;
2713  (~C)(i+1UL,j+3UL) = sum( xmm8 ) * scalar;
2714  }
2715  for( ; (j+2UL) <= N; j+=2UL ) {
2716  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2717  for( size_t k=0UL; k<K; k+=IT::size ) {
2718  const IntrinsicType a1( A.load(i ,k) );
2719  const IntrinsicType a2( A.load(i+1UL,k) );
2720  const IntrinsicType b1( B.load(k,j ) );
2721  const IntrinsicType b2( B.load(k,j+1UL) );
2722  xmm1 = xmm1 + a1 * b1;
2723  xmm2 = xmm2 + a1 * b2;
2724  xmm3 = xmm3 + a2 * b1;
2725  xmm4 = xmm4 + a2 * b2;
2726  }
2727  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2728  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2729  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
2730  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
2731  }
2732  if( j < N ) {
2733  IntrinsicType xmm1, xmm2;
2734  for( size_t k=0UL; k<K; k+=IT::size ) {
2735  const IntrinsicType b1( B.load(k,j) );
2736  xmm1 = xmm1 + A.load(i ,k) * b1;
2737  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2738  }
2739  (~C)(i ,j) = sum( xmm1 ) * scalar;
2740  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
2741  }
2742  }
2743  if( i < M ) {
2744  size_t j( 0UL );
2745  for( ; (j+4UL) <= N; j+=4UL ) {
2746  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2747  for( size_t k=0UL; k<K; k+=IT::size ) {
2748  const IntrinsicType a1( A.load(i,k) );
2749  xmm1 = xmm1 + a1 * B.load(k,j );
2750  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2751  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
2752  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
2753  }
2754  (~C)(i,j ) = sum( xmm1 ) * scalar;
2755  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
2756  (~C)(i,j+2UL) = sum( xmm3 ) * scalar;
2757  (~C)(i,j+3UL) = sum( xmm4 ) * scalar;
2758  }
2759  for( ; (j+2UL) <= N; j+=2UL ) {
2760  IntrinsicType xmm1, xmm2;
2761  for( size_t k=0UL; k<K; k+=IT::size ) {
2762  const IntrinsicType a1( A.load(i,k) );
2763  xmm1 = xmm1 + a1 * B.load(k,j );
2764  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2765  }
2766  (~C)(i,j ) = sum( xmm1 ) * scalar;
2767  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
2768  }
2769  if( j < N ) {
2770  IntrinsicType xmm1, xmm2;
2771  for( size_t k=0UL; k<K; k+=IT::size ) {
2772  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2773  }
2774  (~C)(i,j) = sum( xmm1 ) * scalar;
2775  }
2776  }
2777  }
2778  //**********************************************************************************************
2779 
2780  //**Vectorized default assignment to column-major dense matrices********************************
2794  template< typename MT3 // Type of the left-hand side target matrix
2795  , typename MT4 // Type of the left-hand side matrix operand
2796  , typename MT5 // Type of the right-hand side matrix operand
2797  , typename ST2 > // Type of the scalar value
2798  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2799  selectDefaultAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
2800  {
2801  typedef IntrinsicTrait<ElementType> IT;
2802 
2803  const size_t M( A.rows() );
2804  const size_t N( B.columns() );
2805  const size_t K( A.columns() );
2806 
2807  size_t i( 0UL );
2808 
2809  for( ; (i+4UL) <= M; i+=4UL ) {
2810  size_t j( 0UL );
2811  for( ; (j+2UL) <= N; j+=2UL ) {
2812  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2813  for( size_t k=0UL; k<K; k+=IT::size ) {
2814  const IntrinsicType a1( A.load(i ,k) );
2815  const IntrinsicType a2( A.load(i+1UL,k) );
2816  const IntrinsicType a3( A.load(i+2UL,k) );
2817  const IntrinsicType a4( A.load(i+3UL,k) );
2818  const IntrinsicType b1( B.load(k,j ) );
2819  const IntrinsicType b2( B.load(k,j+1UL) );
2820  xmm1 = xmm1 + a1 * b1;
2821  xmm2 = xmm2 + a1 * b2;
2822  xmm3 = xmm3 + a2 * b1;
2823  xmm4 = xmm4 + a2 * b2;
2824  xmm5 = xmm5 + a3 * b1;
2825  xmm6 = xmm6 + a3 * b2;
2826  xmm7 = xmm7 + a4 * b1;
2827  xmm8 = xmm8 + a4 * b2;
2828  }
2829  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2830  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2831  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
2832  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
2833  (~C)(i+2UL,j ) = sum( xmm5 ) * scalar;
2834  (~C)(i+2UL,j+1UL) = sum( xmm6 ) * scalar;
2835  (~C)(i+3UL,j ) = sum( xmm7 ) * scalar;
2836  (~C)(i+3UL,j+1UL) = sum( xmm8 ) * scalar;
2837  }
2838  if( j < N ) {
2839  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2840  for( size_t k=0UL; k<K; k+=IT::size ) {
2841  const IntrinsicType b1( B.load(k,j) );
2842  xmm1 = xmm1 + A.load(i ,k) * b1;
2843  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2844  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
2845  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
2846  }
2847  (~C)(i ,j) = sum( xmm1 ) * scalar;
2848  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
2849  (~C)(i+2UL,j) = sum( xmm3 ) * scalar;
2850  (~C)(i+3UL,j) = sum( xmm4 ) * scalar;
2851  }
2852  }
2853  for( ; (i+2UL) <= M; i+=2UL ) {
2854  size_t j( 0UL );
2855  for( ; (j+2UL) <= N; j+=2UL ) {
2856  IntrinsicType xmm1, xmm2, xmm3, xmm4;
2857  for( size_t k=0UL; k<K; k+=IT::size ) {
2858  const IntrinsicType a1( A.load(i ,k) );
2859  const IntrinsicType a2( A.load(i+1UL,k) );
2860  const IntrinsicType b1( B.load(k,j ) );
2861  const IntrinsicType b2( B.load(k,j+1UL) );
2862  xmm1 = xmm1 + a1 * b1;
2863  xmm2 = xmm2 + a1 * b2;
2864  xmm3 = xmm3 + a2 * b1;
2865  xmm4 = xmm4 + a2 * b2;
2866  }
2867  (~C)(i ,j ) = sum( xmm1 ) * scalar;
2868  (~C)(i ,j+1UL) = sum( xmm2 ) * scalar;
2869  (~C)(i+1UL,j ) = sum( xmm3 ) * scalar;
2870  (~C)(i+1UL,j+1UL) = sum( xmm4 ) * scalar;
2871  }
2872  if( j < N ) {
2873  IntrinsicType xmm1, xmm2;
2874  for( size_t k=0UL; k<K; k+=IT::size ) {
2875  const IntrinsicType b1( B.load(k,j) );
2876  xmm1 = xmm1 + A.load(i ,k) * b1;
2877  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2878  }
2879  (~C)(i ,j) = sum( xmm1 ) * scalar;
2880  (~C)(i+1UL,j) = sum( xmm2 ) * scalar;
2881  }
2882  }
2883  if( i < M ) {
2884  size_t j( 0UL );
2885  for( ; (j+2UL) <= N; j+=2UL ) {
2886  IntrinsicType xmm1, xmm2;
2887  for( size_t k=0UL; k<K; k+=IT::size ) {
2888  const IntrinsicType a1( A.load(i,k) );
2889  xmm1 = xmm1 + a1 * B.load(k,j );
2890  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2891  }
2892  (~C)(i,j ) = sum( xmm1 ) * scalar;
2893  (~C)(i,j+1UL) = sum( xmm2 ) * scalar;
2894  }
2895  if( j < N ) {
2896  IntrinsicType xmm1, xmm2;
2897  for( size_t k=0UL; k<K; k+=IT::size ) {
2898  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2899  }
2900  (~C)(i,j) = sum( xmm1 ) * scalar;
2901  }
2902  }
2903  }
2904  //**********************************************************************************************
2905 
2906  //**BLAS-based assignment to dense matrices (default)*******************************************
2920  template< typename MT3 // Type of the left-hand side target matrix
2921  , typename MT4 // Type of the left-hand side matrix operand
2922  , typename MT5 // Type of the right-hand side matrix operand
2923  , typename ST2 > // Type of the scalar value
2924  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2925  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2926  {
2927  selectDefaultAssignKernel( C, A, B, scalar );
2928  }
2929  //**********************************************************************************************
2930 
2931  //**BLAS-based assignment to dense matrices (single precision)**********************************
2932 #if BLAZE_BLAS_MODE
2933 
2946  template< typename MT3 // Type of the left-hand side target matrix
2947  , typename MT4 // Type of the left-hand side matrix operand
2948  , typename MT5 // Type of the right-hand side matrix operand
2949  , typename ST2 > // Type of the scalar value
2950  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2951  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2952  {
2953  using boost::numeric_cast;
2954 
2958 
2959  const int M ( numeric_cast<int>( A.rows() ) );
2960  const int N ( numeric_cast<int>( B.columns() ) );
2961  const int K ( numeric_cast<int>( A.columns() ) );
2962  const int lda( numeric_cast<int>( A.spacing() ) );
2963  const int ldb( numeric_cast<int>( B.spacing() ) );
2964  const int ldc( numeric_cast<int>( C.spacing() ) );
2965 
2966  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2967  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2968  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2969  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2970  }
2971 #endif
2972  //**********************************************************************************************
2973 
2974  //**BLAS-based assignment to dense matrices (double precision)**********************************
2975 #if BLAZE_BLAS_MODE
2976 
2989  template< typename MT3 // Type of the left-hand side target matrix
2990  , typename MT4 // Type of the left-hand side matrix operand
2991  , typename MT5 // Type of the right-hand side matrix operand
2992  , typename ST2 > // Type of the scalar value
2993  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2994  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
2995  {
2996  using boost::numeric_cast;
2997 
3001 
3002  const int M ( numeric_cast<int>( A.rows() ) );
3003  const int N ( numeric_cast<int>( B.columns() ) );
3004  const int K ( numeric_cast<int>( A.columns() ) );
3005  const int lda( numeric_cast<int>( A.spacing() ) );
3006  const int ldb( numeric_cast<int>( B.spacing() ) );
3007  const int ldc( numeric_cast<int>( C.spacing() ) );
3008 
3009  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3010  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3011  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3012  M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
3013  }
3014 #endif
3015  //**********************************************************************************************
3016 
3017  //**BLAS-based assignment to dense matrices (single precision complex)**************************
3018 #if BLAZE_BLAS_MODE
3019 
3032  template< typename MT3 // Type of the left-hand side target matrix
3033  , typename MT4 // Type of the left-hand side matrix operand
3034  , typename MT5 // Type of the right-hand side matrix operand
3035  , typename ST2 > // Type of the scalar value
3036  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3037  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3038  {
3039  using boost::numeric_cast;
3040 
3044  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3045  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3046  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3047 
3048  const int M ( numeric_cast<int>( A.rows() ) );
3049  const int N ( numeric_cast<int>( B.columns() ) );
3050  const int K ( numeric_cast<int>( A.columns() ) );
3051  const int lda( numeric_cast<int>( A.spacing() ) );
3052  const int ldb( numeric_cast<int>( B.spacing() ) );
3053  const int ldc( numeric_cast<int>( C.spacing() ) );
3054  const complex<float> alpha( scalar );
3055  const complex<float> beta ( 0.0F, 0.0F );
3056 
3057  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3058  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3059  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3060  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3061  }
3062 #endif
3063  //**********************************************************************************************
3064 
3065  //**BLAS-based assignment to dense matrices (double precision complex)**************************
3066 #if BLAZE_BLAS_MODE
3067 
3080  template< typename MT3 // Type of the left-hand side target matrix
3081  , typename MT4 // Type of the left-hand side matrix operand
3082  , typename MT5 // Type of the right-hand side matrix operand
3083  , typename ST2 > // Type of the scalar value
3084  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3085  selectBlasAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3086  {
3087  using boost::numeric_cast;
3088 
3092  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3093  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3094  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3095 
3096  const int M ( numeric_cast<int>( A.rows() ) );
3097  const int N ( numeric_cast<int>( B.columns() ) );
3098  const int K ( numeric_cast<int>( A.columns() ) );
3099  const int lda( numeric_cast<int>( A.spacing() ) );
3100  const int ldb( numeric_cast<int>( B.spacing() ) );
3101  const int ldc( numeric_cast<int>( C.spacing() ) );
3102  const complex<double> alpha( scalar );
3103  const complex<double> beta ( 0.0, 0.0 );
3104 
3105  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3106  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3107  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3108  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3109  }
3110 #endif
3111  //**********************************************************************************************
3112 
3113  //**Assignment to sparse matrices***************************************************************
3125  template< typename MT // Type of the target sparse matrix
3126  , bool SO > // Storage order of the target sparse matrix
3127  friend inline void assign( SparseMatrix<MT,SO>& lhs, const DMatScalarMultExpr& rhs )
3128  {
3130 
3131  typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
3132 
3139 
3140  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3141  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3142 
3143  const TmpType tmp( rhs );
3144  smpAssign( ~lhs, tmp );
3145  }
3146  //**********************************************************************************************
3147 
3148  //**Addition assignment to dense matrices*******************************************************
3160  template< typename MT3 // Type of the target dense matrix
3161  , bool SO > // Storage order of the target dense matrix
3162  friend inline void addAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3163  {
3165 
3166  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3167  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3168 
3169  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3170  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3171 
3172  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3173  return;
3174  }
3175 
3176  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3177  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3178 
3179  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3180  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3181  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3182  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3183  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3184  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3185 
3186  DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3187  }
3188  //**********************************************************************************************
3189 
3190  //**Addition assignment to dense matrices (kernel selection)************************************
3201  template< typename MT3 // Type of the left-hand side target matrix
3202  , typename MT4 // Type of the left-hand side matrix operand
3203  , typename MT5 // Type of the right-hand side matrix operand
3204  , typename ST2 > // Type of the scalar value
3205  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3206  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3207  {
3208  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
3209  DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3210  else
3211  DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3212  }
3213  //**********************************************************************************************
3214 
3215  //**Addition assignment to dense matrices (kernel selection)************************************
3226  template< typename MT3 // Type of the left-hand side target matrix
3227  , typename MT4 // Type of the left-hand side matrix operand
3228  , typename MT5 // Type of the right-hand side matrix operand
3229  , typename ST2 > // Type of the scalar value
3230  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3231  selectAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3232  {
3233  smpAddAssign( C, A * B * scalar );
3234  }
3235  //**********************************************************************************************
3236 
3237  //**Default addition assignment to dense matrices***********************************************
3251  template< typename MT3 // Type of the left-hand side target matrix
3252  , typename MT4 // Type of the left-hand side matrix operand
3253  , typename MT5 // Type of the right-hand side matrix operand
3254  , typename ST2 > // Type of the scalar value
3255  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3256  selectDefaultAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3257  {
3258  const ResultType tmp( A * B * scalar );
3259  addAssign( C, tmp );
3260  }
3261  //**********************************************************************************************
3262 
3263  //**Vectorized default addition assignment to row-major dense matrices**************************
3277  template< typename MT3 // Type of the left-hand side target matrix
3278  , typename MT4 // Type of the left-hand side matrix operand
3279  , typename MT5 // Type of the right-hand side matrix operand
3280  , typename ST2 > // Type of the scalar value
3281  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3282  selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3283  {
3284  typedef IntrinsicTrait<ElementType> IT;
3285 
3286  const size_t M( A.rows() );
3287  const size_t N( B.columns() );
3288  const size_t K( A.columns() );
3289 
3290  size_t i( 0UL );
3291 
3292  for( ; (i+2UL) <= M; i+=2UL ) {
3293  size_t j( 0UL );
3294  for( ; (j+4UL) <= N; j+=4UL ) {
3295  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3296  for( size_t k=0UL; k<K; k+=IT::size ) {
3297  const IntrinsicType a1( A.load(i ,k) );
3298  const IntrinsicType a2( A.load(i+1UL,k) );
3299  const IntrinsicType b1( B.load(k,j ) );
3300  const IntrinsicType b2( B.load(k,j+1UL) );
3301  const IntrinsicType b3( B.load(k,j+2UL) );
3302  const IntrinsicType b4( B.load(k,j+3UL) );
3303  xmm1 = xmm1 + a1 * b1;
3304  xmm2 = xmm2 + a1 * b2;
3305  xmm3 = xmm3 + a1 * b3;
3306  xmm4 = xmm4 + a1 * b4;
3307  xmm5 = xmm5 + a2 * b1;
3308  xmm6 = xmm6 + a2 * b2;
3309  xmm7 = xmm7 + a2 * b3;
3310  xmm8 = xmm8 + a2 * b4;
3311  }
3312  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3313  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3314  (~C)(i ,j+2UL) += sum( xmm3 ) * scalar;
3315  (~C)(i ,j+3UL) += sum( xmm4 ) * scalar;
3316  (~C)(i+1UL,j ) += sum( xmm5 ) * scalar;
3317  (~C)(i+1UL,j+1UL) += sum( xmm6 ) * scalar;
3318  (~C)(i+1UL,j+2UL) += sum( xmm7 ) * scalar;
3319  (~C)(i+1UL,j+3UL) += sum( xmm8 ) * scalar;
3320  }
3321  for( ; (j+2UL) <= N; j+=2UL ) {
3322  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3323  for( size_t k=0UL; k<K; k+=IT::size ) {
3324  const IntrinsicType a1( A.load(i ,k) );
3325  const IntrinsicType a2( A.load(i+1UL,k) );
3326  const IntrinsicType b1( B.load(k,j ) );
3327  const IntrinsicType b2( B.load(k,j+1UL) );
3328  xmm1 = xmm1 + a1 * b1;
3329  xmm2 = xmm2 + a1 * b2;
3330  xmm3 = xmm3 + a2 * b1;
3331  xmm4 = xmm4 + a2 * b2;
3332  }
3333  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3334  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3335  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3336  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3337  }
3338  if( j < N ) {
3339  IntrinsicType xmm1, xmm2;
3340  for( size_t k=0UL; k<K; k+=IT::size ) {
3341  const IntrinsicType b1( B.load(k,j) );
3342  xmm1 = xmm1 + A.load(i ,k) * b1;
3343  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3344  }
3345  (~C)(i ,j) += sum( xmm1 ) * scalar;
3346  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3347  }
3348  }
3349  if( i < M ) {
3350  size_t j( 0UL );
3351  for( ; (j+4UL) <= N; j+=4UL ) {
3352  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3353  for( size_t k=0UL; k<K; k+=IT::size ) {
3354  const IntrinsicType a1( A.load(i,k) );
3355  xmm1 = xmm1 + a1 * B.load(k,j );
3356  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3357  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3358  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3359  }
3360  (~C)(i,j ) += sum( xmm1 ) * scalar;
3361  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3362  (~C)(i,j+2UL) += sum( xmm3 ) * scalar;
3363  (~C)(i,j+3UL) += sum( xmm4 ) * scalar;
3364  }
3365  for( ; (j+2UL) <= N; j+=2UL ) {
3366  IntrinsicType xmm1, xmm2;
3367  for( size_t k=0UL; k<K; k+=IT::size ) {
3368  const IntrinsicType a1( A.load(i,k) );
3369  xmm1 = xmm1 + a1 * B.load(k,j );
3370  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3371  }
3372  (~C)(i,j ) += sum( xmm1 ) * scalar;
3373  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3374  }
3375  if( j < N ) {
3376  IntrinsicType xmm1, xmm2;
3377  for( size_t k=0UL; k<K; k+=IT::size ) {
3378  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3379  }
3380  (~C)(i,j) += sum( xmm1 ) * scalar;
3381  }
3382  }
3383  }
3384  //**********************************************************************************************
3385 
3386  //**Vectorized default addition assignment to column-major dense matrices***********************
3400  template< typename MT3 // Type of the left-hand side target matrix
3401  , typename MT4 // Type of the left-hand side matrix operand
3402  , typename MT5 // Type of the right-hand side matrix operand
3403  , typename ST2 > // Type of the scalar value
3404  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3405  selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3406  {
3407  typedef IntrinsicTrait<ElementType> IT;
3408 
3409  const size_t M( A.rows() );
3410  const size_t N( B.columns() );
3411  const size_t K( A.columns() );
3412 
3413  size_t i( 0UL );
3414 
3415  for( ; (i+4UL) <= M; i+=4UL ) {
3416  size_t j( 0UL );
3417  for( ; (j+2UL) <= N; j+=2UL ) {
3418  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3419  for( size_t k=0UL; k<K; k+=IT::size ) {
3420  const IntrinsicType a1( A.load(i ,k) );
3421  const IntrinsicType a2( A.load(i+1UL,k) );
3422  const IntrinsicType a3( A.load(i+2UL,k) );
3423  const IntrinsicType a4( A.load(i+3UL,k) );
3424  const IntrinsicType b1( B.load(k,j ) );
3425  const IntrinsicType b2( B.load(k,j+1UL) );
3426  xmm1 = xmm1 + a1 * b1;
3427  xmm2 = xmm2 + a1 * b2;
3428  xmm3 = xmm3 + a2 * b1;
3429  xmm4 = xmm4 + a2 * b2;
3430  xmm5 = xmm5 + a3 * b1;
3431  xmm6 = xmm6 + a3 * b2;
3432  xmm7 = xmm7 + a4 * b1;
3433  xmm8 = xmm8 + a4 * b2;
3434  }
3435  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3436  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3437  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3438  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3439  (~C)(i+2UL,j ) += sum( xmm5 ) * scalar;
3440  (~C)(i+2UL,j+1UL) += sum( xmm6 ) * scalar;
3441  (~C)(i+3UL,j ) += sum( xmm7 ) * scalar;
3442  (~C)(i+3UL,j+1UL) += sum( xmm8 ) * scalar;
3443  }
3444  if( j < N ) {
3445  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3446  for( size_t k=0UL; k<K; k+=IT::size ) {
3447  const IntrinsicType b1( B.load(k,j) );
3448  xmm1 = xmm1 + A.load(i ,k) * b1;
3449  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3450  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3451  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3452  }
3453  (~C)(i ,j) += sum( xmm1 ) * scalar;
3454  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3455  (~C)(i+2UL,j) += sum( xmm3 ) * scalar;
3456  (~C)(i+3UL,j) += sum( xmm4 ) * scalar;
3457  }
3458  }
3459  for( ; (i+2UL) <= M; i+=2UL ) {
3460  size_t j( 0UL );
3461  for( ; (j+2UL) <= N; j+=2UL ) {
3462  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3463  for( size_t k=0UL; k<K; k+=IT::size ) {
3464  const IntrinsicType a1( A.load(i ,k) );
3465  const IntrinsicType a2( A.load(i+1UL,k) );
3466  const IntrinsicType b1( B.load(k,j ) );
3467  const IntrinsicType b2( B.load(k,j+1UL) );
3468  xmm1 = xmm1 + a1 * b1;
3469  xmm2 = xmm2 + a1 * b2;
3470  xmm3 = xmm3 + a2 * b1;
3471  xmm4 = xmm4 + a2 * b2;
3472  }
3473  (~C)(i ,j ) += sum( xmm1 ) * scalar;
3474  (~C)(i ,j+1UL) += sum( xmm2 ) * scalar;
3475  (~C)(i+1UL,j ) += sum( xmm3 ) * scalar;
3476  (~C)(i+1UL,j+1UL) += sum( xmm4 ) * scalar;
3477  }
3478  if( j < N ) {
3479  IntrinsicType xmm1, xmm2;
3480  for( size_t k=0UL; k<K; k+=IT::size ) {
3481  const IntrinsicType b1( B.load(k,j) );
3482  xmm1 = xmm1 + A.load(i ,k) * b1;
3483  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3484  }
3485  (~C)(i ,j) += sum( xmm1 ) * scalar;
3486  (~C)(i+1UL,j) += sum( xmm2 ) * scalar;
3487  }
3488  }
3489  if( i < M ) {
3490  size_t j( 0UL );
3491  for( ; (j+2UL) <= N; j+=2UL ) {
3492  IntrinsicType xmm1, xmm2;
3493  for( size_t k=0UL; k<K; k+=IT::size ) {
3494  const IntrinsicType a1( A.load(i,k) );
3495  xmm1 = xmm1 + a1 * B.load(k,j );
3496  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3497  }
3498  (~C)(i,j ) += sum( xmm1 ) * scalar;
3499  (~C)(i,j+1UL) += sum( xmm2 ) * scalar;
3500  }
3501  if( j < N ) {
3502  IntrinsicType xmm1, xmm2;
3503  for( size_t k=0UL; k<K; k+=IT::size ) {
3504  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3505  }
3506  (~C)(i,j) += sum( xmm1 ) * scalar;
3507  }
3508  }
3509  }
3510  //**********************************************************************************************
3511 
3512  //**BLAS-based addition assignment to dense matrices (default)**********************************
3526  template< typename MT3 // Type of the left-hand side target matrix
3527  , typename MT4 // Type of the left-hand side matrix operand
3528  , typename MT5 // Type of the right-hand side matrix operand
3529  , typename ST2 > // Type of the scalar value
3530  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3531  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3532  {
3533  selectDefaultAddAssignKernel( C, A, B, scalar );
3534  }
3535  //**********************************************************************************************
3536 
3537  //**BLAS-based addition assignment to dense matrices (single precision)*************************
3538 #if BLAZE_BLAS_MODE
3539 
3552  template< typename MT3 // Type of the left-hand side target matrix
3553  , typename MT4 // Type of the left-hand side matrix operand
3554  , typename MT5 // Type of the right-hand side matrix operand
3555  , typename ST2 > // Type of the scalar value
3556  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3557  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3558  {
3559  using boost::numeric_cast;
3560 
3564 
3565  const int M ( numeric_cast<int>( A.rows() ) );
3566  const int N ( numeric_cast<int>( B.columns() ) );
3567  const int K ( numeric_cast<int>( A.columns() ) );
3568  const int lda( numeric_cast<int>( A.spacing() ) );
3569  const int ldb( numeric_cast<int>( B.spacing() ) );
3570  const int ldc( numeric_cast<int>( C.spacing() ) );
3571 
3572  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3573  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3574  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3575  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3576  }
3577 #endif
3578  //**********************************************************************************************
3579 
3580  //**BLAS-based addition assignment to dense matrices (double precision)*************************
3581 #if BLAZE_BLAS_MODE
3582 
3595  template< typename MT3 // Type of the left-hand side target matrix
3596  , typename MT4 // Type of the left-hand side matrix operand
3597  , typename MT5 // Type of the right-hand side matrix operand
3598  , typename ST2 > // Type of the scalar value
3599  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3600  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3601  {
3602  using boost::numeric_cast;
3603 
3607 
3608  const int M ( numeric_cast<int>( A.rows() ) );
3609  const int N ( numeric_cast<int>( B.columns() ) );
3610  const int K ( numeric_cast<int>( A.columns() ) );
3611  const int lda( numeric_cast<int>( A.spacing() ) );
3612  const int ldb( numeric_cast<int>( B.spacing() ) );
3613  const int ldc( numeric_cast<int>( C.spacing() ) );
3614 
3615  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3616  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3617  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3618  M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3619  }
3620 #endif
3621  //**********************************************************************************************
3622 
3623  //**BLAS-based addition assignment to dense matrices (single precision complex)*****************
3624 #if BLAZE_BLAS_MODE
3625 
3638  template< typename MT3 // Type of the left-hand side target matrix
3639  , typename MT4 // Type of the left-hand side matrix operand
3640  , typename MT5 // Type of the right-hand side matrix operand
3641  , typename ST2 > // Type of the scalar value
3642  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3643  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3644  {
3645  using boost::numeric_cast;
3646 
3650  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
3651  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
3652  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
3653 
3654  const int M ( numeric_cast<int>( A.rows() ) );
3655  const int N ( numeric_cast<int>( B.columns() ) );
3656  const int K ( numeric_cast<int>( A.columns() ) );
3657  const int lda( numeric_cast<int>( A.spacing() ) );
3658  const int ldb( numeric_cast<int>( B.spacing() ) );
3659  const int ldc( numeric_cast<int>( C.spacing() ) );
3660  const complex<float> alpha( scalar );
3661  const complex<float> beta ( 1.0F, 0.0F );
3662 
3663  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3664  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3665  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3666  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3667  }
3668 #endif
3669  //**********************************************************************************************
3670 
3671  //**BLAS-based addition assignment to dense matrices (double precision complex)*****************
3672 #if BLAZE_BLAS_MODE
3673 
3686  template< typename MT3 // Type of the left-hand side target matrix
3687  , typename MT4 // Type of the left-hand side matrix operand
3688  , typename MT5 // Type of the right-hand side matrix operand
3689  , typename ST2 > // Type of the scalar value
3690  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3691  selectBlasAddAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3692  {
3693  using boost::numeric_cast;
3694 
3698  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
3699  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
3700  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
3701 
3702  const int M ( numeric_cast<int>( A.rows() ) );
3703  const int N ( numeric_cast<int>( B.columns() ) );
3704  const int K ( numeric_cast<int>( A.columns() ) );
3705  const int lda( numeric_cast<int>( A.spacing() ) );
3706  const int ldb( numeric_cast<int>( B.spacing() ) );
3707  const int ldc( numeric_cast<int>( C.spacing() ) );
3708  const complex<double> alpha( scalar );
3709  const complex<double> beta ( 1.0, 0.0 );
3710 
3711  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3712  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3713  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3714  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3715  }
3716 #endif
3717  //**********************************************************************************************
3718 
3719  //**Addition assignment to sparse matrices******************************************************
3720  // No special implementation for the addition assignment to sparse matrices.
3721  //**********************************************************************************************
3722 
3723  //**Subtraction assignment to dense matrices****************************************************
3735  template< typename MT3 // Type of the target dense matrix
3736  , bool SO > // Storage order of the target dense matrix
3737  friend inline void subAssign( DenseMatrix<MT3,SO>& lhs, const DMatScalarMultExpr& rhs )
3738  {
3740 
3741  BLAZE_INTERNAL_ASSERT( (~lhs).rows() == rhs.rows() , "Invalid number of rows" );
3742  BLAZE_INTERNAL_ASSERT( (~lhs).columns() == rhs.columns(), "Invalid number of columns" );
3743 
3744  typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3745  typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3746 
3747  if( (~lhs).rows() == 0UL || (~lhs).columns() == 0UL || left.columns() == 0UL ) {
3748  return;
3749  }
3750 
3751  LT A( left ); // Evaluation of the left-hand side dense matrix operand
3752  RT B( right ); // Evaluation of the right-hand side dense matrix operand
3753 
3754  BLAZE_INTERNAL_ASSERT( A.rows() == left.rows() , "Invalid number of rows" );
3755  BLAZE_INTERNAL_ASSERT( A.columns() == left.columns() , "Invalid number of columns" );
3756  BLAZE_INTERNAL_ASSERT( B.rows() == right.rows() , "Invalid number of rows" );
3757  BLAZE_INTERNAL_ASSERT( B.columns() == right.columns() , "Invalid number of columns" );
3758  BLAZE_INTERNAL_ASSERT( A.rows() == (~lhs).rows() , "Invalid number of rows" );
3759  BLAZE_INTERNAL_ASSERT( B.columns() == (~lhs).columns(), "Invalid number of columns" );
3760 
3761  DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3762  }
3763  //**********************************************************************************************
3764 
3765  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3776  template< typename MT3 // Type of the left-hand side target matrix
3777  , typename MT4 // Type of the left-hand side matrix operand
3778  , typename MT5 // Type of the right-hand side matrix operand
3779  , typename ST2 > // Type of the scalar value
3780  static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3781  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3782  {
3783  if( C.rows() * C.columns() < DMATTDMATMULT_THRESHOLD )
3784  DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3785  else
3786  DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3787  }
3788  //**********************************************************************************************
3789 
3790  //**Subtraction assignment to dense matrices (kernel selection)*********************************
3801  template< typename MT3 // Type of the left-hand side target matrix
3802  , typename MT4 // Type of the left-hand side matrix operand
3803  , typename MT5 // Type of the right-hand side matrix operand
3804  , typename ST2 > // Type of the scalar value
3805  static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3806  selectSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3807  {
3808  smpSubAssign( C, A * B * scalar );
3809  }
3810  //**********************************************************************************************
3811 
3812  //**Default subtraction assignment to dense matrices********************************************
3826  template< typename MT3 // Type of the left-hand side target matrix
3827  , typename MT4 // Type of the left-hand side matrix operand
3828  , typename MT5 // Type of the right-hand side matrix operand
3829  , typename ST2 > // Type of the scalar value
3830  static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3831  selectDefaultSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
3832  {
3833  const ResultType tmp( A * B * scalar );
3834  subAssign( C, tmp );
3835  }
3836  //**********************************************************************************************
3837 
3838  //**Vectorized default subtraction assignment to row-major dense matrices***********************
3852  template< typename MT3 // Type of the left-hand side target matrix
3853  , typename MT4 // Type of the left-hand side matrix operand
3854  , typename MT5 // Type of the right-hand side matrix operand
3855  , typename ST2 > // Type of the scalar value
3856  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3857  selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C, const MT4& A, const MT5& B, ST2 scalar )
3858  {
3859  typedef IntrinsicTrait<ElementType> IT;
3860 
3861  const size_t M( A.rows() );
3862  const size_t N( B.columns() );
3863  const size_t K( A.columns() );
3864 
3865  size_t i( 0UL );
3866 
3867  for( ; (i+2UL) <= M; i+=2UL ) {
3868  size_t j( 0UL );
3869  for( ; (j+4UL) <= N; j+=4UL ) {
3870  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3871  for( size_t k=0UL; k<K; k+=IT::size ) {
3872  const IntrinsicType a1( A.load(i ,k) );
3873  const IntrinsicType a2( A.load(i+1UL,k) );
3874  const IntrinsicType b1( B.load(k,j ) );
3875  const IntrinsicType b2( B.load(k,j+1UL) );
3876  const IntrinsicType b3( B.load(k,j+2UL) );
3877  const IntrinsicType b4( B.load(k,j+3UL) );
3878  xmm1 = xmm1 + a1 * b1;
3879  xmm2 = xmm2 + a1 * b2;
3880  xmm3 = xmm3 + a1 * b3;
3881  xmm4 = xmm4 + a1 * b4;
3882  xmm5 = xmm5 + a2 * b1;
3883  xmm6 = xmm6 + a2 * b2;
3884  xmm7 = xmm7 + a2 * b3;
3885  xmm8 = xmm8 + a2 * b4;
3886  }
3887  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
3888  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
3889  (~C)(i ,j+2UL) -= sum( xmm3 ) * scalar;
3890  (~C)(i ,j+3UL) -= sum( xmm4 ) * scalar;
3891  (~C)(i+1UL,j ) -= sum( xmm5 ) * scalar;
3892  (~C)(i+1UL,j+1UL) -= sum( xmm6 ) * scalar;
3893  (~C)(i+1UL,j+2UL) -= sum( xmm7 ) * scalar;
3894  (~C)(i+1UL,j+3UL) -= sum( xmm8 ) * scalar;
3895  }
3896  for( ; (j+2UL) <= N; j+=2UL ) {
3897  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3898  for( size_t k=0UL; k<K; k+=IT::size ) {
3899  const IntrinsicType a1( A.load(i ,k) );
3900  const IntrinsicType a2( A.load(i+1UL,k) );
3901  const IntrinsicType b1( B.load(k,j ) );
3902  const IntrinsicType b2( B.load(k,j+1UL) );
3903  xmm1 = xmm1 + a1 * b1;
3904  xmm2 = xmm2 + a1 * b2;
3905  xmm3 = xmm3 + a2 * b1;
3906  xmm4 = xmm4 + a2 * b2;
3907  }
3908  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
3909  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
3910  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
3911  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
3912  }
3913  if( j < N ) {
3914  IntrinsicType xmm1, xmm2;
3915  for( size_t k=0UL; k<K; k+=IT::size ) {
3916  const IntrinsicType b1( B.load(k,j) );
3917  xmm1 = xmm1 + A.load(i ,k) * b1;
3918  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3919  }
3920  (~C)(i ,j) -= sum( xmm1 ) * scalar;
3921  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
3922  }
3923  }
3924  if( i < M ) {
3925  size_t j( 0UL );
3926  for( ; (j+4UL) <= N; j+=4UL ) {
3927  IntrinsicType xmm1, xmm2, xmm3, xmm4;
3928  for( size_t k=0UL; k<K; k+=IT::size ) {
3929  const IntrinsicType a1( A.load(i,k) );
3930  xmm1 = xmm1 + a1 * B.load(k,j );
3931  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3932  xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3933  xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3934  }
3935  (~C)(i,j ) -= sum( xmm1 ) * scalar;
3936  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
3937  (~C)(i,j+2UL) -= sum( xmm3 ) * scalar;
3938  (~C)(i,j+3UL) -= sum( xmm4 ) * scalar;
3939  }
3940  for( ; (j+2UL) <= N; j+=2UL ) {
3941  IntrinsicType xmm1, xmm2;
3942  for( size_t k=0UL; k<K; k+=IT::size ) {
3943  const IntrinsicType a1( A.load(i,k) );
3944  xmm1 = xmm1 + a1 * B.load(k,j );
3945  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3946  }
3947  (~C)(i,j ) -= sum( xmm1 ) * scalar;
3948  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
3949  }
3950  if( j < N ) {
3951  IntrinsicType xmm1, xmm2;
3952  for( size_t k=0UL; k<K; k+=IT::size ) {
3953  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3954  }
3955  (~C)(i,j) -= sum( xmm1 ) * scalar;
3956  }
3957  }
3958  }
3959  //**********************************************************************************************
3960 
3961  //**Vectorized default subtraction assignment to column-major dense matrices********************
3975  template< typename MT3 // Type of the left-hand side target matrix
3976  , typename MT4 // Type of the left-hand side matrix operand
3977  , typename MT5 // Type of the right-hand side matrix operand
3978  , typename ST2 > // Type of the scalar value
3979  static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3980  selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C, const MT4& A, const MT5& B, ST2 scalar )
3981  {
3982  typedef IntrinsicTrait<ElementType> IT;
3983 
3984  const size_t M( A.rows() );
3985  const size_t N( B.columns() );
3986  const size_t K( A.columns() );
3987 
3988  size_t i( 0UL );
3989 
3990  for( ; (i+4UL) <= M; i+=4UL ) {
3991  size_t j( 0UL );
3992  for( ; (j+2UL) <= N; j+=2UL ) {
3993  IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3994  for( size_t k=0UL; k<K; k+=IT::size ) {
3995  const IntrinsicType a1( A.load(i ,k) );
3996  const IntrinsicType a2( A.load(i+1UL,k) );
3997  const IntrinsicType a3( A.load(i+2UL,k) );
3998  const IntrinsicType a4( A.load(i+3UL,k) );
3999  const IntrinsicType b1( B.load(k,j ) );
4000  const IntrinsicType b2( B.load(k,j+1UL) );
4001  xmm1 = xmm1 + a1 * b1;
4002  xmm2 = xmm2 + a1 * b2;
4003  xmm3 = xmm3 + a2 * b1;
4004  xmm4 = xmm4 + a2 * b2;
4005  xmm5 = xmm5 + a3 * b1;
4006  xmm6 = xmm6 + a3 * b2;
4007  xmm7 = xmm7 + a4 * b1;
4008  xmm8 = xmm8 + a4 * b2;
4009  }
4010  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
4011  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
4012  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
4013  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
4014  (~C)(i+2UL,j ) -= sum( xmm5 ) * scalar;
4015  (~C)(i+2UL,j+1UL) -= sum( xmm6 ) * scalar;
4016  (~C)(i+3UL,j ) -= sum( xmm7 ) * scalar;
4017  (~C)(i+3UL,j+1UL) -= sum( xmm8 ) * scalar;
4018  }
4019  if( j < N ) {
4020  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4021  for( size_t k=0UL; k<K; k+=IT::size ) {
4022  const IntrinsicType b1( B.load(k,j) );
4023  xmm1 = xmm1 + A.load(i ,k) * b1;
4024  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4025  xmm3 = xmm3 + A.load(i+2UL,k) * b1;
4026  xmm4 = xmm4 + A.load(i+3UL,k) * b1;
4027  }
4028  (~C)(i ,j) -= sum( xmm1 ) * scalar;
4029  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
4030  (~C)(i+2UL,j) -= sum( xmm3 ) * scalar;
4031  (~C)(i+3UL,j) -= sum( xmm4 ) * scalar;
4032  }
4033  }
4034  for( ; (i+2UL) <= M; i+=2UL ) {
4035  size_t j( 0UL );
4036  for( ; (j+2UL) <= N; j+=2UL ) {
4037  IntrinsicType xmm1, xmm2, xmm3, xmm4;
4038  for( size_t k=0UL; k<K; k+=IT::size ) {
4039  const IntrinsicType a1( A.load(i ,k) );
4040  const IntrinsicType a2( A.load(i+1UL,k) );
4041  const IntrinsicType b1( B.load(k,j ) );
4042  const IntrinsicType b2( B.load(k,j+1UL) );
4043  xmm1 = xmm1 + a1 * b1;
4044  xmm2 = xmm2 + a1 * b2;
4045  xmm3 = xmm3 + a2 * b1;
4046  xmm4 = xmm4 + a2 * b2;
4047  }
4048  (~C)(i ,j ) -= sum( xmm1 ) * scalar;
4049  (~C)(i ,j+1UL) -= sum( xmm2 ) * scalar;
4050  (~C)(i+1UL,j ) -= sum( xmm3 ) * scalar;
4051  (~C)(i+1UL,j+1UL) -= sum( xmm4 ) * scalar;
4052  }
4053  if( j < N ) {
4054  IntrinsicType xmm1, xmm2;
4055  for( size_t k=0UL; k<K; k+=IT::size ) {
4056  const IntrinsicType b1( B.load(k,j) );
4057  xmm1 = xmm1 + A.load(i ,k) * b1;
4058  xmm2 = xmm2 + A.load(i+1UL,k) * b1;
4059  }
4060  (~C)(i ,j) -= sum( xmm1 ) * scalar;
4061  (~C)(i+1UL,j) -= sum( xmm2 ) * scalar;
4062  }
4063  }
4064  if( i < M ) {
4065  size_t j( 0UL );
4066  for( ; (j+2UL) <= N; j+=2UL ) {
4067  IntrinsicType xmm1, xmm2;
4068  for( size_t k=0UL; k<K; k+=IT::size ) {
4069  const IntrinsicType a1( A.load(i,k) );
4070  xmm1 = xmm1 + a1 * B.load(k,j );
4071  xmm2 = xmm2 + a1 * B.load(k,j+1UL);
4072  }
4073  (~C)(i,j ) -= sum( xmm1 ) * scalar;
4074  (~C)(i,j+1UL) -= sum( xmm2 ) * scalar;
4075  }
4076  if( j < N ) {
4077  IntrinsicType xmm1, xmm2;
4078  for( size_t k=0UL; k<K; k+=IT::size ) {
4079  xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
4080  }
4081  (~C)(i,j) -= sum( xmm1 ) * scalar;
4082  }
4083  }
4084  }
4085  //**********************************************************************************************
4086 
4087  //**BLAS-based subtraction assignment to dense matrices (default)*******************************
4101  template< typename MT3 // Type of the left-hand side target matrix
4102  , typename MT4 // Type of the left-hand side matrix operand
4103  , typename MT5 // Type of the right-hand side matrix operand
4104  , typename ST2 > // Type of the scalar value
4105  static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4106  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4107  {
4108  selectDefaultSubAssignKernel( C, A, B, scalar );
4109  }
4110  //**********************************************************************************************
4111 
4112  //**BLAS-based subraction assignment to dense matrices (single precision)***********************
4113 #if BLAZE_BLAS_MODE
4114 
4127  template< typename MT3 // Type of the left-hand side target matrix
4128  , typename MT4 // Type of the left-hand side matrix operand
4129  , typename MT5 // Type of the right-hand side matrix operand
4130  , typename ST2 > // Type of the scalar value
4131  static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4132  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4133  {
4134  using boost::numeric_cast;
4135 
4139 
4140  const int M ( numeric_cast<int>( A.rows() ) );
4141  const int N ( numeric_cast<int>( B.columns() ) );
4142  const int K ( numeric_cast<int>( A.columns() ) );
4143  const int lda( numeric_cast<int>( A.spacing() ) );
4144  const int ldb( numeric_cast<int>( B.spacing() ) );
4145  const int ldc( numeric_cast<int>( C.spacing() ) );
4146 
4147  cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4148  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4149  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4150  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
4151  }
4152 #endif
4153  //**********************************************************************************************
4154 
4155  //**BLAS-based subraction assignment to dense matrices (double precision)***********************
4156 #if BLAZE_BLAS_MODE
4157 
4170  template< typename MT3 // Type of the left-hand side target matrix
4171  , typename MT4 // Type of the left-hand side matrix operand
4172  , typename MT5 // Type of the right-hand side matrix operand
4173  , typename ST2 > // Type of the scalar value
4174  static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4175  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4176  {
4177  using boost::numeric_cast;
4178 
4182 
4183  const int M ( numeric_cast<int>( A.rows() ) );
4184  const int N ( numeric_cast<int>( B.columns() ) );
4185  const int K ( numeric_cast<int>( A.columns() ) );
4186  const int lda( numeric_cast<int>( A.spacing() ) );
4187  const int ldb( numeric_cast<int>( B.spacing() ) );
4188  const int ldc( numeric_cast<int>( C.spacing() ) );
4189 
4190  cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4191  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4192  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4193  M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
4194  }
4195 #endif
4196  //**********************************************************************************************
4197 
4198  //**BLAS-based subraction assignment to dense matrices (single precision complex)***************
4199 #if BLAZE_BLAS_MODE
4200 
4213  template< typename MT3 // Type of the left-hand side target matrix
4214  , typename MT4 // Type of the left-hand side matrix operand
4215  , typename MT5 // Type of the right-hand side matrix operand
4216  , typename ST2 > // Type of the scalar value
4217  static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4218  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4219  {
4220  using boost::numeric_cast;
4221 
4225  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT3::ElementType::value_type );
4226  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT4::ElementType::value_type );
4227  BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE ( typename MT5::ElementType::value_type );
4228 
4229  const int M ( numeric_cast<int>( A.rows() ) );
4230  const int N ( numeric_cast<int>( B.columns() ) );
4231  const int K ( numeric_cast<int>( A.columns() ) );
4232  const int lda( numeric_cast<int>( A.spacing() ) );
4233  const int ldb( numeric_cast<int>( B.spacing() ) );
4234  const int ldc( numeric_cast<int>( C.spacing() ) );
4235  const complex<float> alpha( -scalar );
4236  const complex<float> beta ( 1.0F, 0.0F );
4237 
4238  cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4239  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4240  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4241  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4242  }
4243 #endif
4244  //**********************************************************************************************
4245 
4246  //**BLAS-based subraction assignment to dense matrices (double precision complex)***************
4247 #if BLAZE_BLAS_MODE
4248 
4261  template< typename MT3 // Type of the left-hand side target matrix
4262  , typename MT4 // Type of the left-hand side matrix operand
4263  , typename MT5 // Type of the right-hand side matrix operand
4264  , typename ST2 > // Type of the scalar value
4265  static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4266  selectBlasSubAssignKernel( MT3& C, const MT4& A, const MT5& B, ST2 scalar )
4267  {
4268  using boost::numeric_cast;
4269 
4273  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT3::ElementType::value_type );
4274  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT4::ElementType::value_type );
4275  BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE ( typename MT5::ElementType::value_type );
4276 
4277  const int M ( numeric_cast<int>( A.rows() ) );
4278  const int N ( numeric_cast<int>( B.columns() ) );
4279  const int K ( numeric_cast<int>( A.columns() ) );
4280  const int lda( numeric_cast<int>( A.spacing() ) );
4281  const int ldb( numeric_cast<int>( B.spacing() ) );
4282  const int ldc( numeric_cast<int>( C.spacing() ) );
4283  const complex<double> alpha( -scalar );
4284  const complex<double> beta ( 1.0, 0.0 );
4285 
4286  cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4287  ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4288  ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4289  M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4290  }
4291 #endif
4292  //**********************************************************************************************
4293 
4294  //**Subtraction assignment to sparse matrices***************************************************
4295  // No special implementation for the subtraction assignment to sparse matrices.
4296  //**********************************************************************************************
4297 
4298  //**Multiplication assignment to dense matrices*************************************************
4299  // No special implementation for the multiplication assignment to dense matrices.
4300  //**********************************************************************************************
4301 
4302  //**Multiplication assignment to sparse matrices************************************************
4303  // No special implementation for the multiplication assignment to sparse matrices.
4304  //**********************************************************************************************
4305 
4306  //**Compile time checks*************************************************************************
4315  //**********************************************************************************************
4316 };
4318 //*************************************************************************************************
4319 
4320 
4321 
4322 
4323 //=================================================================================================
4324 //
4325 // GLOBAL BINARY ARITHMETIC OPERATORS
4326 //
4327 //=================================================================================================
4328 
4329 //*************************************************************************************************
4358 template< typename T1 // Type of the left-hand side dense matrix
4359  , typename T2 > // Type of the right-hand side dense matrix
4360 inline const DMatTDMatMultExpr<T1,T2>
4362 {
4364 
4365  if( (~lhs).columns() != (~rhs).rows() )
4366  throw std::invalid_argument( "Matrix sizes do not match" );
4367 
4368  return DMatTDMatMultExpr<T1,T2>( ~lhs, ~rhs );
4369 }
4370 //*************************************************************************************************
4371 
4372 
4373 
4374 
4375 //=================================================================================================
4376 //
4377 // EXPRESSION TRAIT SPECIALIZATIONS
4378 //
4379 //=================================================================================================
4380 
4381 //*************************************************************************************************
4383 template< typename MT1, typename MT2, typename VT >
4384 struct DMatDVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
4385 {
4386  public:
4387  //**********************************************************************************************
4388  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4389  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4390  IsDenseVector<VT>::value && IsColumnVector<VT>::value
4391  , typename DMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4392  , INVALID_TYPE >::Type Type;
4393  //**********************************************************************************************
4394 };
4396 //*************************************************************************************************
4397 
4398 
4399 //*************************************************************************************************
4401 template< typename MT1, typename MT2, typename VT >
4402 struct DMatSVecMultExprTrait< DMatTDMatMultExpr<MT1,MT2>, VT >
4403 {
4404  public:
4405  //**********************************************************************************************
4406  typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4407  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4408  IsSparseVector<VT>::value && IsColumnVector<VT>::value
4409  , typename DMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4410  , INVALID_TYPE >::Type Type;
4411  //**********************************************************************************************
4412 };
4414 //*************************************************************************************************
4415 
4416 
4417 //*************************************************************************************************
4419 template< typename VT, typename MT1, typename MT2 >
4420 struct TDVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
4421 {
4422  public:
4423  //**********************************************************************************************
4424  typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4425  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4426  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4427  , typename TDVecTDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4428  , INVALID_TYPE >::Type Type;
4429  //**********************************************************************************************
4430 };
4432 //*************************************************************************************************
4433 
4434 
4435 //*************************************************************************************************
4437 template< typename VT, typename MT1, typename MT2 >
4438 struct TSVecDMatMultExprTrait< VT, DMatTDMatMultExpr<MT1,MT2> >
4439 {
4440  public:
4441  //**********************************************************************************************
4442  typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4443  IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4444  IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4445  , typename TDVecTDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4446  , INVALID_TYPE >::Type Type;
4447  //**********************************************************************************************
4448 };
4450 //*************************************************************************************************
4451 
4452 
4453 //*************************************************************************************************
4455 template< typename MT1, typename MT2, bool AF >
4456 struct SubmatrixExprTrait< DMatTDMatMultExpr<MT1,MT2>, AF >
4457 {
4458  public:
4459  //**********************************************************************************************
4460  typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4461  , typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4462  //**********************************************************************************************
4463 };
4465 //*************************************************************************************************
4466 
4467 
4468 //*************************************************************************************************
4470 template< typename MT1, typename MT2 >
4471 struct RowExprTrait< DMatTDMatMultExpr<MT1,MT2> >
4472 {
4473  public:
4474  //**********************************************************************************************
4475  typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4476  //**********************************************************************************************
4477 };
4479 //*************************************************************************************************
4480 
4481 
4482 //*************************************************************************************************
4484 template< typename MT1, typename MT2 >
4485 struct ColumnExprTrait< DMatTDMatMultExpr<MT1,MT2> >
4486 {
4487  public:
4488  //**********************************************************************************************
4489  typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
4490  //**********************************************************************************************
4491 };
4493 //*************************************************************************************************
4494 
4495 } // namespace blaze
4496 
4497 #endif
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatTDMatMultExpr.h:403
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:247
Data type constraint.
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:255
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4579
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4075
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:249
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:299
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:197
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
Header file for the IsColumnMajorMatrix type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatTDMatMultExpr.h:393
Header file for the sparse matrix SMP implementation.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2384
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:249
DMatTDMatMultExpr< MT1, MT2 > This
Type of this DMatTDMatMultExpr instance.
Definition: DMatTDMatMultExpr.h:245
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:247
Header file for the TDVecSMatMultExprTrait class template.
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:124
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:126
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:284
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:339
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:252
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TDMatSVecMultExprTrait class template.
Header file for the dense matrix SMP implementation.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:383
Header file for the DenseMatrix base class.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:123
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:127
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
const size_t SMP_DMATTDMATMULT_THRESHOLD
SMP row-major dense matrix/column-major dense matrix multiplication threshold.This threshold represen...
Definition: Thresholds.h:446
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatTDMatMultExpr.h:250
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:329
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
Header file for the IsNumeric type trait.
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:246
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
const size_t DMATTDMATMULT_THRESHOLD
Row-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies the ...
Definition: Thresholds.h:142
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:371
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:125
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:349
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:248
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:251
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2379
Header file for basic type definitions.
Header file for the IsComplex type trait.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:412
Header file for the complex data type.
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:116
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:122
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:413
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:359
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:248
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:261
Constraint on the data type.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:264
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:258
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.