35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
118 template<
typename MT1
120 class TDMatTDMatMultExpr :
public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
121 ,
private MatMatMultExpr
122 ,
private Computation
152 template<
typename T1,
typename T2,
typename T3 >
153 struct CanExploitSymmetry {
154 enum { value = IsRowMajorMatrix<T1>::value &&
155 ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
166 template<
typename T1,
typename T2,
typename T3 >
167 struct IsEvaluationRequired {
168 enum { value = ( evaluateLeft || evaluateRight ) &&
169 CanExploitSymmetry<T1,T2,T3>::value };
179 template<
typename T1,
typename T2,
typename T3 >
180 struct UseSinglePrecisionKernel {
182 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
183 IsFloat<typename T1::ElementType>::value &&
184 IsFloat<typename T2::ElementType>::value &&
185 IsFloat<typename T3::ElementType>::value };
195 template<
typename T1,
typename T2,
typename T3 >
196 struct UseDoublePrecisionKernel {
198 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
199 IsDouble<typename T1::ElementType>::value &&
200 IsDouble<typename T2::ElementType>::value &&
201 IsDouble<typename T3::ElementType>::value };
212 template<
typename T1,
typename T2,
typename T3 >
213 struct UseSinglePrecisionComplexKernel {
214 typedef complex<float> Type;
216 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
217 IsSame<typename T1::ElementType,Type>::value &&
218 IsSame<typename T2::ElementType,Type>::value &&
219 IsSame<typename T3::ElementType,Type>::value };
230 template<
typename T1,
typename T2,
typename T3 >
231 struct UseDoublePrecisionComplexKernel {
232 typedef complex<double> Type;
234 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
235 IsSame<typename T1::ElementType,Type>::value &&
236 IsSame<typename T2::ElementType,Type>::value &&
237 IsSame<typename T3::ElementType,Type>::value };
247 template<
typename T1,
typename T2,
typename T3 >
248 struct UseDefaultKernel {
249 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
250 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
251 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
252 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
262 template<
typename T1,
typename T2,
typename T3 >
263 struct UseVectorizedDefaultKernel {
264 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
265 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
266 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
267 IntrinsicTrait<typename T1::ElementType>::addition &&
268 IntrinsicTrait<typename T1::ElementType>::subtraction &&
269 IntrinsicTrait<typename T1::ElementType>::multiplication };
300 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
306 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
307 !evaluateRight && MT2::smpAssignable };
337 if(
lhs_.columns() != 0UL ) {
338 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
340 for(
size_t k=1UL; k<
end; k+=2UL ) {
342 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
344 if( end <
lhs_.columns() ) {
372 return rhs_.columns();
402 template<
typename T >
404 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
414 template<
typename T >
416 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
426 return lhs_.isAligned() &&
rhs_.isAligned();
465 template<
typename MT3
468 static inline void sgemm( MT3& C,
const MT4& A,
const MT5& B,
float alpha,
float beta )
470 using boost::numeric_cast;
476 const int M ( numeric_cast<int>( A.rows() ) );
477 const int N ( numeric_cast<int>( B.columns() ) );
478 const int K ( numeric_cast<int>( A.columns() ) );
479 const int lda( numeric_cast<int>( A.spacing() ) );
480 const int ldb( numeric_cast<int>( B.spacing() ) );
481 const int ldc( numeric_cast<int>( C.spacing() ) );
484 cblas_ssymm( CblasColMajor, CblasLeft, CblasUpper,
485 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
488 cblas_ssymm( CblasColMajor, CblasRight, CblasUpper,
489 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
495 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
519 template<
typename MT3
522 static inline void dgemm( MT3& C,
const MT4& A,
const MT5& B,
double alpha,
double beta )
524 using boost::numeric_cast;
530 const int M ( numeric_cast<int>( A.rows() ) );
531 const int N ( numeric_cast<int>( B.columns() ) );
532 const int K ( numeric_cast<int>( A.columns() ) );
533 const int lda( numeric_cast<int>( A.spacing() ) );
534 const int ldb( numeric_cast<int>( B.spacing() ) );
535 const int ldc( numeric_cast<int>( C.spacing() ) );
538 cblas_dsymm( CblasColMajor, CblasLeft, CblasUpper,
539 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
541 else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
542 cblas_dsymm( CblasColMajor, CblasRight, CblasUpper,
543 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
546 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
547 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
548 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
549 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
574 template<
typename MT3
577 static inline void cgemm( MT3& C,
const MT4& A,
const MT5& B,
578 complex<float> alpha, complex<float> beta )
580 using boost::numeric_cast;
589 const int M ( numeric_cast<int>( A.rows() ) );
590 const int N ( numeric_cast<int>( B.columns() ) );
591 const int K ( numeric_cast<int>( A.columns() ) );
592 const int lda( numeric_cast<int>( A.spacing() ) );
593 const int ldb( numeric_cast<int>( B.spacing() ) );
594 const int ldc( numeric_cast<int>( C.spacing() ) );
596 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
597 cblas_csymm( CblasColMajor, CblasLeft, CblasUpper,
598 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
600 else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
601 cblas_csymm( CblasColMajor, CblasRight, CblasUpper,
602 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
605 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
606 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
607 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
608 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
633 template<
typename MT3
636 static inline void zgemm( MT3& C,
const MT4& A,
const MT5& B,
637 complex<double> alpha, complex<double> beta )
639 using boost::numeric_cast;
648 const int M ( numeric_cast<int>( A.rows() ) );
649 const int N ( numeric_cast<int>( B.columns() ) );
650 const int K ( numeric_cast<int>( A.columns() ) );
651 const int lda( numeric_cast<int>( A.spacing() ) );
652 const int ldb( numeric_cast<int>( B.spacing() ) );
653 const int ldc( numeric_cast<int>( C.spacing() ) );
655 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
656 cblas_zsymm( CblasColMajor, CblasLeft, CblasUpper,
657 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
659 else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
660 cblas_zsymm( CblasColMajor, CblasRight, CblasUpper,
661 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
664 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
665 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
666 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
667 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
687 template<
typename MT
689 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
697 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
700 else if( rhs.lhs_.columns() == 0UL ) {
715 TDMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
731 template<
typename MT3
734 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
737 TDMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
739 TDMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
758 template<
typename MT3
761 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
762 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
764 const size_t M( A.rows() );
765 const size_t N( B.columns() );
766 const size_t K( A.columns() );
768 for(
size_t i=0UL; i<M; ++i ) {
769 for(
size_t j=0UL; j<N; ++j ) {
770 C(i,j) = A(i,0UL) * B(0UL,j);
772 for(
size_t k=1UL; k<K; ++k ) {
773 for(
size_t j=0UL; j<N; ++j ) {
774 C(i,j) += A(i,k) * B(k,j);
796 template<
typename MT3
799 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
800 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
805 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
809 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
813 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
839 template<
typename MT3
842 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
843 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
845 typedef IntrinsicTrait<ElementType> IT;
847 const size_t M( A.rows() );
848 const size_t N( B.columns() );
849 const size_t K( A.columns() );
854 for(
size_t j=0UL; j<N; ++j ) {
855 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
856 for(
size_t k=0UL; k<K; ++k ) {
858 xmm1 = xmm1 + A.load(i ,k) * b1;
859 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
860 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
861 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
862 xmm5 = xmm5 + A.load(i+
IT::size*4UL,k) * b1;
863 xmm6 = xmm6 + A.load(i+
IT::size*5UL,k) * b1;
864 xmm7 = xmm7 + A.load(i+
IT::size*6UL,k) * b1;
865 xmm8 = xmm8 + A.load(i+
IT::size*7UL,k) * b1;
867 (~C).
store( i , j, xmm1 );
879 for( ; (j+2UL) <= N; j+=2UL ) {
880 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
881 for(
size_t k=0UL; k<K; ++k ) {
888 xmm1 = xmm1 + a1 * b1;
889 xmm2 = xmm2 + a2 * b1;
890 xmm3 = xmm3 + a3 * b1;
891 xmm4 = xmm4 + a4 * b1;
892 xmm5 = xmm5 + a1 * b2;
893 xmm6 = xmm6 + a2 * b2;
894 xmm7 = xmm7 + a3 * b2;
895 xmm8 = xmm8 + a4 * b2;
897 (~C).
store( i , j , xmm1 );
901 (~C).
store( i , j+1UL, xmm5 );
908 for(
size_t k=0UL; k<K; ++k ) {
910 xmm1 = xmm1 + A.load(i ,k) * b1;
911 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
912 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
913 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
915 (~C).
store( i , j, xmm1 );
923 for( ; (j+2UL) <= N; j+=2UL ) {
925 for(
size_t k=0UL; k<K; ++k ) {
930 xmm1 = xmm1 + a1 * b1;
931 xmm2 = xmm2 + a2 * b1;
932 xmm3 = xmm3 + a1 * b2;
933 xmm4 = xmm4 + a2 * b2;
935 (~C).
store( i , j , xmm1 );
937 (~C).
store( i , j+1UL, xmm3 );
942 for(
size_t k=0UL; k<K; ++k ) {
944 xmm1 = xmm1 + A.load(i ,k) * b1;
945 xmm2 = xmm2 + A.load(i+
IT::size,k) * b1;
947 (~C).
store( i , j, xmm1 );
953 for( ; (j+2UL) <= N; j+=2UL ) {
955 for(
size_t k=0UL; k<K; ++k ) {
957 xmm1 = xmm1 + a1 *
set( B(k,j ) );
958 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
960 (~C).
store( i, j , xmm1 );
961 (~C).
store( i, j+1UL, xmm2 );
965 for(
size_t k=0UL; k<K; ++k ) {
966 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
968 (~C).
store( i, j, xmm1 );
989 template<
typename MT3
992 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
993 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
995 selectDefaultAssignKernel( C, A, B );
1015 template<
typename MT3
1018 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1019 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1021 sgemm( C, A, B, 1.0F, 0.0F );
1042 template<
typename MT3
1045 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1046 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1048 dgemm( C, A, B, 1.0, 0.0 );
1069 template<
typename MT3
1072 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1073 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1075 cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1096 template<
typename MT3
1099 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1100 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1102 zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1121 template<
typename MT
1123 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1128 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
1140 const TmpType tmp(
serial( rhs ) );
1161 template<
typename MT >
1162 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1172 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1174 else if( IsSymmetric<MT1>::value )
1195 template<
typename MT
1197 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1205 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1219 TDMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1235 template<
typename MT3
1238 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1241 TDMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1243 TDMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1262 template<
typename MT3
1265 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1266 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1268 const size_t M( A.rows() );
1269 const size_t N( B.columns() );
1270 const size_t K( A.columns() );
1273 const size_t end( N &
size_t(-2) );
1275 for(
size_t i=0UL; i<M; ++i ) {
1276 for(
size_t k=0UL; k<K; ++k ) {
1277 for(
size_t j=0UL; j<
end; j+=2UL ) {
1278 C(i,j ) += A(i,k) * B(k,j );
1279 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1282 C(i,end) += A(i,k) * B(k,end);
1304 template<
typename MT3
1307 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1308 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1313 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1317 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1321 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1347 template<
typename MT3
1350 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1351 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1353 typedef IntrinsicTrait<ElementType> IT;
1355 const size_t M( A.rows() );
1356 const size_t N( B.columns() );
1357 const size_t K( A.columns() );
1362 for(
size_t j=0UL; j<N; ++j ) {
1371 for(
size_t k=0UL; k<K; ++k ) {
1373 xmm1 = xmm1 + A.load(i ,k) * b1;
1374 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
1375 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
1376 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
1377 xmm5 = xmm5 + A.load(i+
IT::size*4UL,k) * b1;
1378 xmm6 = xmm6 + A.load(i+
IT::size*5UL,k) * b1;
1379 xmm7 = xmm7 + A.load(i+
IT::size*6UL,k) * b1;
1380 xmm8 = xmm8 + A.load(i+
IT::size*7UL,k) * b1;
1382 (~C).
store( i , j, xmm1 );
1394 for( ; (j+2UL) <= N; j+=2UL ) {
1403 for(
size_t k=0UL; k<K; ++k ) {
1410 xmm1 = xmm1 + a1 * b1;
1411 xmm2 = xmm2 + a2 * b1;
1412 xmm3 = xmm3 + a3 * b1;
1413 xmm4 = xmm4 + a4 * b1;
1414 xmm5 = xmm5 + a1 * b2;
1415 xmm6 = xmm6 + a2 * b2;
1416 xmm7 = xmm7 + a3 * b2;
1417 xmm8 = xmm8 + a4 * b2;
1419 (~C).
store( i , j , xmm1 );
1423 (~C).
store( i , j+1UL, xmm5 );
1433 for(
size_t k=0UL; k<K; ++k ) {
1435 xmm1 = xmm1 + A.load(i ,k) * b1;
1436 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
1437 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
1438 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
1440 (~C).
store( i , j, xmm1 );
1448 for( ; (j+2UL) <= N; j+=2UL ) {
1453 for(
size_t k=0UL; k<K; ++k ) {
1458 xmm1 = xmm1 + a1 * b1;
1459 xmm2 = xmm2 + a2 * b1;
1460 xmm3 = xmm3 + a1 * b2;
1461 xmm4 = xmm4 + a2 * b2;
1463 (~C).
store( i , j , xmm1 );
1465 (~C).
store( i , j+1UL, xmm3 );
1471 for(
size_t k=0UL; k<K; ++k ) {
1473 xmm1 = xmm1 + A.load(i ,k) * b1;
1474 xmm2 = xmm2 + A.load(i+
IT::size,k) * b1;
1476 (~C).
store( i , j, xmm1 );
1482 for( ; (j+2UL) <= N; j+=2UL ) {
1485 for(
size_t k=0UL; k<K; ++k ) {
1487 xmm1 = xmm1 + a1 *
set( B(k,j ) );
1488 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
1490 (~C).
store( i, j , xmm1 );
1491 (~C).
store( i, j+1UL, xmm2 );
1495 for(
size_t k=0UL; k<K; ++k ) {
1496 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
1498 (~C).
store( i, j, xmm1 );
1519 template<
typename MT3
1522 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1523 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1525 selectDefaultAddAssignKernel( C, A, B );
1545 template<
typename MT3
1548 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1549 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1551 sgemm( C, A, B, 1.0F, 1.0F );
1572 template<
typename MT3
1575 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1576 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1578 dgemm( C, A, B, 1.0, 1.0 );
1599 template<
typename MT3
1602 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1603 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1605 cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1626 template<
typename MT3
1629 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1630 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1632 zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1653 template<
typename MT >
1654 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1664 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1666 else if( IsSymmetric<MT1>::value )
1691 template<
typename MT
1693 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1701 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1715 TDMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1731 template<
typename MT3
1734 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1737 TDMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1739 TDMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1758 template<
typename MT3
1761 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1762 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1764 const size_t M( A.rows() );
1765 const size_t N( B.columns() );
1766 const size_t K( A.columns() );
1769 const size_t end( N &
size_t(-2) );
1771 for(
size_t i=0UL; i<M; ++i ) {
1772 for(
size_t k=0UL; k<K; ++k ) {
1773 for(
size_t j=0UL; j<
end; j+=2UL ) {
1774 C(i,j ) -= A(i,k) * B(k,j );
1775 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1778 C(i,end) -= A(i,k) * B(k,end);
1800 template<
typename MT3
1803 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1804 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1809 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1813 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1817 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1843 template<
typename MT3
1846 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1847 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1849 typedef IntrinsicTrait<ElementType> IT;
1851 const size_t M( A.rows() );
1852 const size_t N( B.columns() );
1853 const size_t K( A.columns() );
1858 for(
size_t j=0UL; j<N; ++j ) {
1867 for(
size_t k=0UL; k<K; ++k ) {
1869 xmm1 = xmm1 - A.load(i ,k) * b1;
1870 xmm2 = xmm2 - A.load(i+
IT::size ,k) * b1;
1871 xmm3 = xmm3 - A.load(i+
IT::size*2UL,k) * b1;
1872 xmm4 = xmm4 - A.load(i+
IT::size*3UL,k) * b1;
1873 xmm5 = xmm5 - A.load(i+
IT::size*4UL,k) * b1;
1874 xmm6 = xmm6 - A.load(i+
IT::size*5UL,k) * b1;
1875 xmm7 = xmm7 - A.load(i+
IT::size*6UL,k) * b1;
1876 xmm8 = xmm8 - A.load(i+
IT::size*7UL,k) * b1;
1878 (~C).
store( i , j, xmm1 );
1890 for( ; (j+2UL) <= N; j+=2UL ) {
1899 for(
size_t k=0UL; k<K; ++k ) {
1906 xmm1 = xmm1 - a1 * b1;
1907 xmm2 = xmm2 - a2 * b1;
1908 xmm3 = xmm3 - a3 * b1;
1909 xmm4 = xmm4 - a4 * b1;
1910 xmm5 = xmm5 - a1 * b2;
1911 xmm6 = xmm6 - a2 * b2;
1912 xmm7 = xmm7 - a3 * b2;
1913 xmm8 = xmm8 - a4 * b2;
1915 (~C).
store( i , j , xmm1 );
1919 (~C).
store( i , j+1UL, xmm5 );
1929 for(
size_t k=0UL; k<K; ++k ) {
1931 xmm1 = xmm1 - A.load(i ,k) * b1;
1932 xmm2 = xmm2 - A.load(i+
IT::size ,k) * b1;
1933 xmm3 = xmm3 - A.load(i+
IT::size*2UL,k) * b1;
1934 xmm4 = xmm4 - A.load(i+
IT::size*3UL,k) * b1;
1936 (~C).
store( i , j, xmm1 );
1944 for( ; (j+2UL) <= N; j+=2UL ) {
1949 for(
size_t k=0UL; k<K; ++k ) {
1954 xmm1 = xmm1 - a1 * b1;
1955 xmm2 = xmm2 - a2 * b1;
1956 xmm3 = xmm3 - a1 * b2;
1957 xmm4 = xmm4 - a2 * b2;
1959 (~C).
store( i , j , xmm1 );
1961 (~C).
store( i , j+1UL, xmm3 );
1967 for(
size_t k=0UL; k<K; ++k ) {
1969 xmm1 = xmm1 - A.load(i ,k) * b1;
1970 xmm2 = xmm2 - A.load(i+
IT::size,k) * b1;
1972 (~C).
store( i , j, xmm1 );
1978 for( ; (j+2UL) <= N; j+=2UL ) {
1981 for(
size_t k=0UL; k<K; ++k ) {
1983 xmm1 = xmm1 - a1 *
set( B(k,j ) );
1984 xmm2 = xmm2 - a1 *
set( B(k,j+1UL) );
1986 (~C).
store( i, j , xmm1 );
1987 (~C).
store( i, j+1UL, xmm2 );
1991 for(
size_t k=0UL; k<K; ++k ) {
1992 xmm1 = xmm1 - A.load(i,k) *
set( B(k,j) );
1994 (~C).
store( i, j, xmm1 );
2015 template<
typename MT3
2018 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2019 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2021 selectDefaultSubAssignKernel( C, A, B );
2041 template<
typename MT3
2044 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2045 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2047 sgemm( C, A, B, -1.0F, 1.0F );
2068 template<
typename MT3
2071 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2072 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2074 dgemm( C, A, B, -1.0, 1.0 );
2095 template<
typename MT3
2098 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2099 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2101 cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2122 template<
typename MT3
2125 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2126 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2128 zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2150 template<
typename MT >
2151 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2161 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2163 else if( IsSymmetric<MT1>::value )
2199 template<
typename MT
2201 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2209 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2212 else if( rhs.lhs_.columns() == 0UL ) {
2248 template<
typename MT
2250 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2255 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2267 const TmpType tmp( rhs );
2288 template<
typename MT >
2289 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2299 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2301 else if( IsSymmetric<MT1>::value )
2325 template<
typename MT
2327 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2335 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2370 template<
typename MT >
2371 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2381 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2383 else if( IsSymmetric<MT1>::value )
2411 template<
typename MT
2413 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2421 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2456 template<
typename MT >
2457 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2467 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2469 else if( IsSymmetric<MT1>::value )
2518 template<
typename MT1
2522 :
public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
2523 ,
private MatScalarMultExpr
2524 ,
private Computation
2528 typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
2540 enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2545 enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2555 template<
typename T1,
typename T2,
typename T3 >
2556 struct CanExploitSymmetry {
2557 enum { value = IsRowMajorMatrix<T1>::value &&
2558 ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
2567 template<
typename T1,
typename T2,
typename T3 >
2568 struct IsEvaluationRequired {
2569 enum { value = ( evaluateLeft || evaluateRight ) &&
2570 !CanExploitSymmetry<T1,T2,T3>::value };
2579 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2580 struct UseSinglePrecisionKernel {
2582 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2583 IsFloat<typename T1::ElementType>::value &&
2584 IsFloat<typename T2::ElementType>::value &&
2585 IsFloat<typename T3::ElementType>::value &&
2586 !IsComplex<T4>::value };
2595 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2596 struct UseDoublePrecisionKernel {
2598 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2599 IsDouble<typename T1::ElementType>::value &&
2600 IsDouble<typename T2::ElementType>::value &&
2601 IsDouble<typename T3::ElementType>::value &&
2602 !IsComplex<T4>::value };
2611 template<
typename T1,
typename T2,
typename T3 >
2612 struct UseSinglePrecisionComplexKernel {
2613 typedef complex<float> Type;
2615 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2616 IsSame<typename T1::ElementType,Type>::value &&
2617 IsSame<typename T2::ElementType,Type>::value &&
2618 IsSame<typename T3::ElementType,Type>::value };
2627 template<
typename T1,
typename T2,
typename T3 >
2628 struct UseDoublePrecisionComplexKernel {
2629 typedef complex<double> Type;
2631 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2632 IsSame<typename T1::ElementType,Type>::value &&
2633 IsSame<typename T2::ElementType,Type>::value &&
2634 IsSame<typename T3::ElementType,Type>::value };
2642 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2643 struct UseDefaultKernel {
2644 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2645 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2646 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2647 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2655 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2656 struct UseVectorizedDefaultKernel {
2657 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2658 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2659 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2660 IsSame<typename T1::ElementType,T4>::value &&
2661 IntrinsicTrait<typename T1::ElementType>::addition &&
2662 IntrinsicTrait<typename T1::ElementType>::subtraction &&
2663 IntrinsicTrait<typename T1::ElementType>::multiplication };
2669 typedef DMatScalarMultExpr<MMM,ST,true>
This;
2670 typedef typename MultTrait<RES,ST>::Type
ResultType;
2674 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2679 typedef const TDMatTDMatMultExpr<MT1,MT2>
LeftOperand;
2685 typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type
LT;
2688 typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type
RT;
2693 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2694 IsSame<ET1,ET2>::value &&
2695 IsSame<ET1,ST>::value &&
2696 IntrinsicTrait<ET1>::addition &&
2697 IntrinsicTrait<ET1>::multiplication };
2700 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2701 !evaluateRight && MT2::smpAssignable };
2710 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2726 return matrix_(i,j) * scalar_;
2735 inline size_t rows()
const {
2736 return matrix_.rows();
2745 inline size_t columns()
const {
2746 return matrix_.columns();
2776 template<
typename T >
2777 inline bool canAlias(
const T* alias )
const {
2778 return matrix_.canAlias( alias );
2788 template<
typename T >
2789 inline bool isAliased(
const T* alias )
const {
2790 return matrix_.isAliased( alias );
2800 return matrix_.isAligned();
2810 typename MMM::RightOperand B( matrix_.rightOperand() );
2839 template<
typename MT3
2842 static inline void sgemm( MT3& C,
const MT4& A,
const MT5& B,
float alpha,
float beta )
2844 using boost::numeric_cast;
2850 const int M ( numeric_cast<int>( A.rows() ) );
2851 const int N ( numeric_cast<int>( B.columns() ) );
2852 const int K ( numeric_cast<int>( A.columns() ) );
2853 const int lda( numeric_cast<int>( A.spacing() ) );
2854 const int ldb( numeric_cast<int>( B.spacing() ) );
2855 const int ldc( numeric_cast<int>( C.spacing() ) );
2857 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2858 cblas_ssymm( CblasColMajor, CblasLeft, CblasUpper,
2859 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2861 else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
2862 cblas_ssymm( CblasColMajor, CblasRight, CblasUpper,
2863 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2866 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2867 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2868 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2869 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2891 template<
typename MT3
2894 static inline void dgemm( MT3& C,
const MT4& A,
const MT5& B,
double alpha,
double beta )
2896 using boost::numeric_cast;
2902 const int M ( numeric_cast<int>( A.rows() ) );
2903 const int N ( numeric_cast<int>( B.columns() ) );
2904 const int K ( numeric_cast<int>( A.columns() ) );
2905 const int lda( numeric_cast<int>( A.spacing() ) );
2906 const int ldb( numeric_cast<int>( B.spacing() ) );
2907 const int ldc( numeric_cast<int>( C.spacing() ) );
2909 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2910 cblas_dsymm( CblasColMajor, CblasLeft, CblasUpper,
2911 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2913 else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
2914 cblas_dsymm( CblasColMajor, CblasRight, CblasUpper,
2915 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2918 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2919 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2920 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2921 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2944 template<
typename MT3
2947 static inline void cgemm( MT3& C,
const MT4& A,
const MT5& B,
2948 complex<float> alpha, complex<float> beta )
2950 using boost::numeric_cast;
2959 const int M ( numeric_cast<int>( A.rows() ) );
2960 const int N ( numeric_cast<int>( B.columns() ) );
2961 const int K ( numeric_cast<int>( A.columns() ) );
2962 const int lda( numeric_cast<int>( A.spacing() ) );
2963 const int ldb( numeric_cast<int>( B.spacing() ) );
2964 const int ldc( numeric_cast<int>( C.spacing() ) );
2966 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
2967 cblas_csymm( CblasColMajor, CblasLeft, CblasUpper,
2968 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2970 else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
2971 cblas_csymm( CblasColMajor, CblasRight, CblasUpper,
2972 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
2975 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2976 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2977 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2978 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3001 template<
typename MT3
3004 static inline void zgemm( MT3& C,
const MT4& A,
const MT5& B,
3005 complex<double> alpha, complex<double> beta )
3007 using boost::numeric_cast;
3016 const int M ( numeric_cast<int>( A.rows() ) );
3017 const int N ( numeric_cast<int>( B.columns() ) );
3018 const int K ( numeric_cast<int>( A.columns() ) );
3019 const int lda( numeric_cast<int>( A.spacing() ) );
3020 const int ldb( numeric_cast<int>( B.spacing() ) );
3021 const int ldc( numeric_cast<int>( C.spacing() ) );
3023 if( IsSymmetric<MT4>::value && IsColumnMajorMatrix<MT3>::value ) {
3024 cblas_zsymm( CblasColMajor, CblasLeft, CblasUpper,
3025 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3027 else if( IsSymmetric<MT5>::value && IsColumnMajorMatrix<MT3>::value ) {
3028 cblas_zsymm( CblasColMajor, CblasRight, CblasUpper,
3029 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
3032 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3033 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3034 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3035 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3053 template<
typename MT
3055 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3056 assign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3063 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3064 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3066 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
3069 else if( left.columns() == 0UL ) {
3084 DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
3099 template<
typename MT3
3103 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3106 DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
3108 DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
3126 template<
typename MT3
3130 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3131 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3133 for(
size_t i=0UL; i<A.rows(); ++i ) {
3134 for(
size_t k=0UL; k<B.columns(); ++k ) {
3135 C(i,k) = A(i,0UL) * B(0UL,k);
3137 for(
size_t j=1UL; j<A.columns(); ++j ) {
3138 for(
size_t k=0UL; k<B.columns(); ++k ) {
3139 C(i,k) += A(i,j) * B(j,k);
3142 for(
size_t k=0UL; k<B.columns(); ++k ) {
3163 template<
typename MT3
3167 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3168 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3173 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3175 assign( ~C, A * tmp * scalar );
3177 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3179 assign( ~C, tmp * B * scalar );
3181 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3183 assign( ~C, A * tmp * scalar );
3187 assign( ~C, tmp * B * scalar );
3206 template<
typename MT3
3210 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3211 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3213 typedef IntrinsicTrait<ElementType> IT;
3215 const size_t M( A.rows() );
3216 const size_t N( B.columns() );
3217 const size_t K( A.columns() );
3224 for(
size_t j=0UL; j<N; ++j ) {
3225 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3226 for(
size_t k=0UL; k<K; ++k ) {
3228 xmm1 = xmm1 + A.load(i ,k) * b1;
3229 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
3230 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
3231 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
3232 xmm5 = xmm5 + A.load(i+
IT::size*4UL,k) * b1;
3233 xmm6 = xmm6 + A.load(i+
IT::size*5UL,k) * b1;
3234 xmm7 = xmm7 + A.load(i+
IT::size*6UL,k) * b1;
3235 xmm8 = xmm8 + A.load(i+
IT::size*7UL,k) * b1;
3237 (~C).
store( i , j, xmm1 * factor );
3249 for( ; (j+2UL) <= N; j+=2UL ) {
3250 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3251 for(
size_t k=0UL; k<K; ++k ) {
3258 xmm1 = xmm1 + a1 * b1;
3259 xmm2 = xmm2 + a2 * b1;
3260 xmm3 = xmm3 + a3 * b1;
3261 xmm4 = xmm4 + a4 * b1;
3262 xmm5 = xmm5 + a1 * b2;
3263 xmm6 = xmm6 + a2 * b2;
3264 xmm7 = xmm7 + a3 * b2;
3265 xmm8 = xmm8 + a4 * b2;
3267 (~C).
store( i , j , xmm1 * factor );
3271 (~C).
store( i , j+1UL, xmm5 * factor );
3278 for(
size_t k=0UL; k<K; ++k ) {
3280 xmm1 = xmm1 + A.load(i ,k) * b1;
3281 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
3282 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
3283 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
3285 (~C).
store( i , j, xmm1 * factor );
3293 for( ; (j+2UL) <= N; j+=2UL ) {
3295 for(
size_t k=0UL; k<K; ++k ) {
3300 xmm1 = xmm1 + a1 * b1;
3301 xmm2 = xmm2 + a2 * b1;
3302 xmm3 = xmm3 + a1 * b2;
3303 xmm4 = xmm4 + a2 * b2;
3305 (~C).
store( i , j , xmm1 * factor );
3307 (~C).
store( i , j+1UL, xmm3 * factor );
3312 for(
size_t k=0UL; k<K; ++k ) {
3314 xmm1 = xmm1 + A.load(i ,k) * b1;
3315 xmm2 = xmm2 + A.load(i+
IT::size,k) * b1;
3317 (~C).
store( i , j, xmm1 * factor );
3323 for( ; (j+2UL) <= N; j+=2UL ) {
3325 for(
size_t k=0UL; k<K; ++k ) {
3327 xmm1 = xmm1 + a1 *
set( B(k,j ) );
3328 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
3330 (~C).
store( i, j , xmm1 * factor );
3331 (~C).
store( i, j+1UL, xmm2 * factor );
3335 for(
size_t k=0UL; k<K; ++k ) {
3336 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
3338 (~C).
store( i, j, xmm1 * factor );
3358 template<
typename MT3
3362 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3363 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3365 selectDefaultAssignKernel( C, A, B, scalar );
3384 template<
typename MT3
3388 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3389 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3391 sgemm( C, A, B, scalar, 0.0F );
3411 template<
typename MT3
3415 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3416 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3418 dgemm( C, A, B, scalar, 0.0 );
3438 template<
typename MT3
3442 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3443 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3445 cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3465 template<
typename MT3
3469 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3470 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3472 zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3489 template<
typename MT
3491 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3492 assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3496 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
3508 const TmpType tmp(
serial( rhs ) );
3527 template<
typename MT >
3528 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3529 assign( Matrix<MT,false>& lhs,
const DMatScalarMultExpr& rhs )
3538 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3539 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3541 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
3543 else if( IsSymmetric<MT1>::value )
3544 assign( ~lhs,
trans( left ) * right * rhs.scalar_ );
3546 assign( ~lhs, left *
trans( right ) * rhs.scalar_ );
3562 template<
typename MT
3564 friend inline void addAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3571 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3572 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3574 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3588 DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3603 template<
typename MT3
3607 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3610 DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3612 DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3630 template<
typename MT3
3634 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3635 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3656 template<
typename MT3
3660 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3661 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3666 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3670 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3674 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3699 template<
typename MT3
3703 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3704 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3706 typedef IntrinsicTrait<ElementType> IT;
3708 const size_t M( A.rows() );
3709 const size_t N( B.columns() );
3710 const size_t K( A.columns() );
3717 for(
size_t j=0UL; j<N; ++j ) {
3718 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3719 for(
size_t k=0UL; k<K; ++k ) {
3721 xmm1 = xmm1 + A.load(i ,k) * b1;
3722 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
3723 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
3724 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
3725 xmm5 = xmm5 + A.load(i+
IT::size*4UL,k) * b1;
3726 xmm6 = xmm6 + A.load(i+
IT::size*5UL,k) * b1;
3727 xmm7 = xmm7 + A.load(i+
IT::size*6UL,k) * b1;
3728 xmm8 = xmm8 + A.load(i+
IT::size*7UL,k) * b1;
3730 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3742 for( ; (j+2UL) <= N; j+=2UL ) {
3743 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3744 for(
size_t k=0UL; k<K; ++k ) {
3751 xmm1 = xmm1 + a1 * b1;
3752 xmm2 = xmm2 + a2 * b1;
3753 xmm3 = xmm3 + a3 * b1;
3754 xmm4 = xmm4 + a4 * b1;
3755 xmm5 = xmm5 + a1 * b2;
3756 xmm6 = xmm6 + a2 * b2;
3757 xmm7 = xmm7 + a3 * b2;
3758 xmm8 = xmm8 + a4 * b2;
3760 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3764 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
3771 for(
size_t k=0UL; k<K; ++k ) {
3773 xmm1 = xmm1 + A.load(i ,k) * b1;
3774 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
3775 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
3776 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
3778 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3786 for( ; (j+2UL) <= N; j+=2UL ) {
3788 for(
size_t k=0UL; k<K; ++k ) {
3793 xmm1 = xmm1 + a1 * b1;
3794 xmm2 = xmm2 + a2 * b1;
3795 xmm3 = xmm3 + a1 * b2;
3796 xmm4 = xmm4 + a2 * b2;
3798 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3800 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
3805 for(
size_t k=0UL; k<K; ++k ) {
3807 xmm1 = xmm1 + A.load(i ,k) * b1;
3808 xmm2 = xmm2 + A.load(i+
IT::size,k) * b1;
3810 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3816 for( ; (j+2UL) <= N; j+=2UL ) {
3818 for(
size_t k=0UL; k<K; ++k ) {
3820 xmm1 = xmm1 + a1 *
set( B(k,j ) );
3821 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
3823 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3824 (~C).
store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
3828 for(
size_t k=0UL; k<K; ++k ) {
3829 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
3831 (~C).
store( i, j, (~C).load(i,j) + xmm1 * factor );
3851 template<
typename MT3
3855 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3856 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3858 selectDefaultAddAssignKernel( C, A, B, scalar );
3877 template<
typename MT3
3881 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3882 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3884 sgemm( C, A, B, scalar, 1.0F );
3904 template<
typename MT3
3908 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3909 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3911 dgemm( C, A, B, scalar, 1.0 );
3931 template<
typename MT3
3935 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3936 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3938 cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3958 template<
typename MT3
3962 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3963 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3965 zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3985 template<
typename MT >
3986 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3987 addAssign( Matrix<MT,false>& lhs,
const DMatScalarMultExpr& rhs )
3996 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3997 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3999 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4001 else if( IsSymmetric<MT1>::value )
4024 template<
typename MT
4026 friend inline void subAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4033 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4034 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4036 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4050 DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
4065 template<
typename MT3
4069 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4072 DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
4074 DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
4092 template<
typename MT3
4096 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4097 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4118 template<
typename MT3
4122 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4123 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
4128 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
4132 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
4136 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
4161 template<
typename MT3
4165 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4166 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
4168 typedef IntrinsicTrait<ElementType> IT;
4170 const size_t M( A.rows() );
4171 const size_t N( B.columns() );
4172 const size_t K( A.columns() );
4179 for(
size_t j=0UL; j<N; ++j ) {
4180 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4181 for(
size_t k=0UL; k<K; ++k ) {
4183 xmm1 = xmm1 + A.load(i ,k) * b1;
4184 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
4185 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
4186 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
4187 xmm5 = xmm5 + A.load(i+
IT::size*4UL,k) * b1;
4188 xmm6 = xmm6 + A.load(i+
IT::size*5UL,k) * b1;
4189 xmm7 = xmm7 + A.load(i+
IT::size*6UL,k) * b1;
4190 xmm8 = xmm8 + A.load(i+
IT::size*7UL,k) * b1;
4192 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
4204 for( ; (j+2UL) <= N; j+=2UL ) {
4205 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4206 for(
size_t k=0UL; k<K; ++k ) {
4213 xmm1 = xmm1 + a1 * b1;
4214 xmm2 = xmm2 + a2 * b1;
4215 xmm3 = xmm3 + a3 * b1;
4216 xmm4 = xmm4 + a4 * b1;
4217 xmm5 = xmm5 + a1 * b2;
4218 xmm6 = xmm6 + a2 * b2;
4219 xmm7 = xmm7 + a3 * b2;
4220 xmm8 = xmm8 + a4 * b2;
4222 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4226 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
4233 for(
size_t k=0UL; k<K; ++k ) {
4235 xmm1 = xmm1 + A.load(i ,k) * b1;
4236 xmm2 = xmm2 + A.load(i+
IT::size ,k) * b1;
4237 xmm3 = xmm3 + A.load(i+
IT::size*2UL,k) * b1;
4238 xmm4 = xmm4 + A.load(i+
IT::size*3UL,k) * b1;
4240 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
4248 for( ; (j+2UL) <= N; j+=2UL ) {
4250 for(
size_t k=0UL; k<K; ++k ) {
4255 xmm1 = xmm1 + a1 * b1;
4256 xmm2 = xmm2 + a2 * b1;
4257 xmm3 = xmm3 + a1 * b2;
4258 xmm4 = xmm4 + a2 * b2;
4260 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4262 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
4267 for(
size_t k=0UL; k<K; ++k ) {
4269 xmm1 = xmm1 + A.load(i ,k) * b1;
4270 xmm2 = xmm2 + A.load(i+
IT::size,k) * b1;
4272 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
4278 for( ; (j+2UL) <= N; j+=2UL ) {
4280 for(
size_t k=0UL; k<K; ++k ) {
4282 xmm1 = xmm1 + a1 *
set( B(k,j ) );
4283 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
4285 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
4286 (~C).
store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
4290 for(
size_t k=0UL; k<K; ++k ) {
4291 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
4293 (~C).
store( i, j, (~C).load(i,j) - xmm1 * factor );
4313 template<
typename MT3
4317 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4318 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4320 selectDefaultSubAssignKernel( C, A, B, scalar );
4339 template<
typename MT3
4343 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4344 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4346 sgemm( C, A, B, -scalar, 1.0F );
4366 template<
typename MT3
4370 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4371 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4373 dgemm( C, A, B, -scalar, 1.0 );
4393 template<
typename MT3
4397 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4398 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4400 cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4420 template<
typename MT3
4424 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4425 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4427 zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4446 template<
typename MT >
4447 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4448 subAssign( Matrix<MT,false>& lhs,
const DMatScalarMultExpr& rhs )
4457 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4458 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4460 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4462 else if( IsSymmetric<MT1>::value )
4496 template<
typename MT
4498 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4499 smpAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4506 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4507 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4509 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
4512 else if( left.columns() == 0UL ) {
4546 template<
typename MT
4548 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4549 smpAssign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4553 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
4565 const TmpType tmp( rhs );
4584 template<
typename MT >
4585 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4586 smpAssign( Matrix<MT,false>& lhs,
const DMatScalarMultExpr& rhs )
4595 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4596 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4598 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4600 else if( IsSymmetric<MT1>::value )
4622 template<
typename MT
4624 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4625 smpAddAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4632 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4633 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4635 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4668 template<
typename MT >
4669 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4670 smpAddAssign( Matrix<MT,false>& lhs,
const DMatScalarMultExpr& rhs )
4679 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4680 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4682 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4684 else if( IsSymmetric<MT1>::value )
4710 template<
typename MT
4712 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4713 smpSubAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4720 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4721 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4723 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4756 template<
typename MT >
4757 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4758 smpSubAssign( Matrix<MT,false>& lhs,
const DMatScalarMultExpr& rhs )
4767 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4768 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4770 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4772 else if( IsSymmetric<MT1>::value )
4840 template<
typename T1
4842 inline const TDMatTDMatMultExpr<T1,T2>
4848 throw std::invalid_argument(
"Matrix sizes do not match" );
4865 template<
typename MT1,
typename MT2 >
4883 template<
typename MT1,
typename MT2 >
4885 :
public Columns<MT2>
4901 template<
typename MT1,
typename MT2 >
4903 :
public IsTrue< IsLower<MT1>::value && IsLower<MT2>::value >
4919 template<
typename MT1,
typename MT2 >
4921 :
public IsTrue< IsUpper<MT1>::value && IsUpper<MT2>::value >
4937 template<
typename MT1,
typename MT2,
typename VT >
4942 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4943 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4944 IsDenseVector<VT>::value && IsColumnVector<VT>::value
4945 ,
typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4946 , INVALID_TYPE >::Type Type;
4955 template<
typename MT1,
typename MT2,
typename VT >
4960 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4961 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4962 IsSparseVector<VT>::value && IsColumnVector<VT>::value
4963 ,
typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4964 , INVALID_TYPE >::Type Type;
4973 template<
typename VT,
typename MT1,
typename MT2 >
4978 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4979 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4980 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4981 ,
typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4982 , INVALID_TYPE >::Type Type;
4991 template<
typename VT,
typename MT1,
typename MT2 >
4996 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4997 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4998 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4999 ,
typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
5000 , INVALID_TYPE >::Type Type;
5009 template<
typename MT1,
typename MT2,
bool AF >
5014 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
5015 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
5024 template<
typename MT1,
typename MT2 >
5029 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
5038 template<
typename MT1,
typename MT2 >
5043 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:286
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:131
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:282
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:129
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
const size_t TDMATTDMATMULT_THRESHOLD
Column-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies t...
Definition: Thresholds.h:176
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:258
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:444
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:130
Header file for the IsColumnMajorMatrix type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTDMatMultExpr.h:425
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:224
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:277
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:331
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:255
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:131
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:403
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2474
Header file for the IsFloat type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:381
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:279
TDMatTDMatMultExpr< MT1, MT2 > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:276
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
const size_t SMP_TDMATTDMATMULT_THRESHOLD
SMP column-major dense matrix/column-major dense matrix multiplication threshold.This threshold speci...
Definition: Thresholds.h:903
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsSymmetric type trait.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:126
Header file for the IsDouble type trait.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:104
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TSVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:289
Header file for the TDMatSVecMultExprTrait class template.
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:391
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:283
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:415
Header file for the IsNumeric type trait.
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:211
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
Header file for run time assertion macros.
Compile time check for column-major matrix types.This type trait tests whether or not the given templ...
Definition: IsColumnMajorMatrix.h:104
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:278
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:280
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:361
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:292
Header file for the IsRowMajorMatrix type trait.
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:932
Header file for the IsComputation type trait class.
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:316
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:127
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
Header file for the IsTrue value trait.
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the complex data type.
Header file for the IsUpper type trait.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:295
Header file for the IsColumnVector type trait.
Constraint on the data type.
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T > >::Type store(T *address, const typename Store< T, sizeof(T)>::Type &value)
Aligned store of a vector of integral values.
Definition: Store.h:225
Header file for the IsResizable type trait.
Constraint on the data type.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTDMatMultExpr.h:435
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:371
Header file for the IsExpression type trait class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:128
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTDMatMultExpr.h:281
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:445