35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
121 template<
typename MT1
155 template<
typename T1,
typename T2,
typename T3 >
156 struct CanExploitSymmetry {
157 enum { value = IsColumnMajorMatrix<T1>::value &&
158 ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
169 template<
typename T1,
typename T2,
typename T3 >
170 struct IsEvaluationRequired {
171 enum { value = ( evaluateLeft || evaluateRight ) &&
172 !CanExploitSymmetry<T1,T2,T3>::value };
182 template<
typename T1,
typename T2,
typename T3 >
183 struct UseSinglePrecisionKernel {
185 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
186 IsFloat<typename T1::ElementType>::value &&
187 IsFloat<typename T2::ElementType>::value &&
188 IsFloat<typename T3::ElementType>::value };
198 template<
typename T1,
typename T2,
typename T3 >
199 struct UseDoublePrecisionKernel {
201 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
202 IsDouble<typename T1::ElementType>::value &&
203 IsDouble<typename T2::ElementType>::value &&
204 IsDouble<typename T3::ElementType>::value };
215 template<
typename T1,
typename T2,
typename T3 >
216 struct UseSinglePrecisionComplexKernel {
217 typedef complex<float> Type;
219 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
220 IsSame<typename T1::ElementType,Type>::value &&
221 IsSame<typename T2::ElementType,Type>::value &&
222 IsSame<typename T3::ElementType,Type>::value };
233 template<
typename T1,
typename T2,
typename T3 >
234 struct UseDoublePrecisionComplexKernel {
235 typedef complex<double> Type;
237 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
238 IsSame<typename T1::ElementType,Type>::value &&
239 IsSame<typename T2::ElementType,Type>::value &&
240 IsSame<typename T3::ElementType,Type>::value };
250 template<
typename T1,
typename T2,
typename T3 >
251 struct UseDefaultKernel {
252 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
253 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
254 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
255 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
265 template<
typename T1,
typename T2,
typename T3 >
266 struct UseVectorizedDefaultKernel {
267 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
268 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
269 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
270 IntrinsicTrait<typename T1::ElementType>::addition &&
271 IntrinsicTrait<typename T1::ElementType>::subtraction &&
272 IntrinsicTrait<typename T1::ElementType>::multiplication };
303 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
309 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
310 !evaluateRight && MT2::smpAssignable };
340 if(
lhs_.columns() != 0UL ) {
341 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
343 for(
size_t k=1UL; k<
end; k+=2UL ) {
345 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
347 if( end <
lhs_.columns() ) {
375 return rhs_.columns();
405 template<
typename T >
407 return (
lhs_.canAlias( alias ) ||
rhs_.canAlias( alias ) );
417 template<
typename T >
419 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
429 return lhs_.isAligned() &&
rhs_.isAligned();
468 template<
typename MT3
471 static inline void sgemm( MT3& C,
const MT4& A,
const MT5& B,
float alpha,
float beta )
473 using boost::numeric_cast;
479 const int M ( numeric_cast<int>( A.rows() ) );
480 const int N ( numeric_cast<int>( B.columns() ) );
481 const int K ( numeric_cast<int>( A.columns() ) );
482 const int lda( numeric_cast<int>( A.spacing() ) );
483 const int ldb( numeric_cast<int>( B.spacing() ) );
484 const int ldc( numeric_cast<int>( C.spacing() ) );
487 cblas_ssymm( CblasRowMajor, CblasLeft, CblasLower,
488 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
491 cblas_ssymm( CblasRowMajor, CblasRight, CblasLower,
492 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
498 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
522 template<
typename MT3
525 static inline void dgemm( MT3& C,
const MT4& A,
const MT5& B,
double alpha,
double beta )
527 using boost::numeric_cast;
533 const int M ( numeric_cast<int>( A.rows() ) );
534 const int N ( numeric_cast<int>( B.columns() ) );
535 const int K ( numeric_cast<int>( A.columns() ) );
536 const int lda( numeric_cast<int>( A.spacing() ) );
537 const int ldb( numeric_cast<int>( B.spacing() ) );
538 const int ldc( numeric_cast<int>( C.spacing() ) );
541 cblas_dsymm( CblasRowMajor, CblasLeft, CblasLower,
542 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
544 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
545 cblas_dsymm( CblasRowMajor, CblasRight, CblasLower,
546 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
549 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
550 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
551 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
552 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
576 template<
typename MT3
579 static inline void cgemm( MT3& C,
const MT4& A,
const MT5& B,
580 complex<float> alpha, complex<float> beta )
582 using boost::numeric_cast;
591 const int M ( numeric_cast<int>( A.rows() ) );
592 const int N ( numeric_cast<int>( B.columns() ) );
593 const int K ( numeric_cast<int>( A.columns() ) );
594 const int lda( numeric_cast<int>( A.spacing() ) );
595 const int ldb( numeric_cast<int>( B.spacing() ) );
596 const int ldc( numeric_cast<int>( C.spacing() ) );
598 if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
599 cblas_csymm( CblasRowMajor, CblasLeft, CblasLower,
600 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
602 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
603 cblas_csymm( CblasRowMajor, CblasRight, CblasLower,
604 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
607 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
608 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
609 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
610 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
634 template<
typename MT3
637 static inline void zgemm( MT3& C,
const MT4& A,
const MT5& B,
638 complex<double> alpha, complex<double> beta )
640 using boost::numeric_cast;
649 const int M ( numeric_cast<int>( A.rows() ) );
650 const int N ( numeric_cast<int>( B.columns() ) );
651 const int K ( numeric_cast<int>( A.columns() ) );
652 const int lda( numeric_cast<int>( A.spacing() ) );
653 const int ldb( numeric_cast<int>( B.spacing() ) );
654 const int ldc( numeric_cast<int>( C.spacing() ) );
656 if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
657 cblas_zsymm( CblasRowMajor, CblasLeft, CblasLower,
658 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
660 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
661 cblas_zsymm( CblasRowMajor, CblasRight, CblasLower,
662 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
665 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
666 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
667 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
668 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
688 template<
typename MT
690 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
698 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
701 else if( rhs.lhs_.columns() == 0UL ) {
716 DMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
732 template<
typename MT3
735 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
738 DMatDMatMultExpr::selectDefaultAssignKernel( C, A, B );
740 DMatDMatMultExpr::selectBlasAssignKernel( C, A, B );
758 template<
typename MT3
761 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
762 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
764 const size_t M( A.rows() );
765 const size_t N( B.columns() );
766 const size_t K( A.columns() );
768 for(
size_t i=0UL; i<M; ++i ) {
769 for(
size_t j=0UL; j<N; ++j ) {
770 C(i,j) = A(i,0UL) * B(0UL,j);
772 for(
size_t k=1UL; k<K; ++k ) {
773 for(
size_t j=0UL; j<N; ++j ) {
774 C(i,j) += A(i,k) * B(k,j);
796 template<
typename MT3
799 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
800 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
802 typedef IntrinsicTrait<ElementType> IT;
804 const size_t M( A.rows() );
805 const size_t N( B.columns() );
806 const size_t K( A.columns() );
811 for(
size_t i=0UL; i<M; ++i ) {
812 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
813 for(
size_t k=0UL; k<K; ++k ) {
815 xmm1 = xmm1 + a1 * B.load(k,j );
816 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
817 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
818 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
819 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
820 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
821 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
822 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
824 (~C).
store( i, j , xmm1 );
836 for( ; (i+2UL) <= M; i+=2UL ) {
837 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
838 for(
size_t k=0UL; k<K; ++k ) {
845 xmm1 = xmm1 + a1 * b1;
846 xmm2 = xmm2 + a1 * b2;
847 xmm3 = xmm3 + a1 * b3;
848 xmm4 = xmm4 + a1 * b4;
849 xmm5 = xmm5 + a2 * b1;
850 xmm6 = xmm6 + a2 * b2;
851 xmm7 = xmm7 + a2 * b3;
852 xmm8 = xmm8 + a2 * b4;
854 (~C).
store( i , j , xmm1 );
858 (~C).
store( i+1UL, j , xmm5 );
865 for(
size_t k=0UL; k<K; ++k ) {
867 xmm1 = xmm1 + a1 * B.load(k,j );
868 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
869 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
870 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
872 (~C).
store( i, j , xmm1 );
880 for( ; (i+2UL) <= M; i+=2UL ) {
882 for(
size_t k=0UL; k<K; ++k ) {
887 xmm1 = xmm1 + a1 * b1;
888 xmm2 = xmm2 + a1 * b2;
889 xmm3 = xmm3 + a2 * b1;
890 xmm4 = xmm4 + a2 * b2;
892 (~C).
store( i , j , xmm1 );
894 (~C).
store( i+1UL, j , xmm3 );
899 for(
size_t k=0UL; k<K; ++k ) {
901 xmm1 = xmm1 + a1 * B.load(k,j );
902 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
904 (~C).
store( i, j , xmm1 );
910 for( ; (i+2UL) <= M; i+=2UL ) {
912 for(
size_t k=0UL; k<K; ++k ) {
914 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
915 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
917 (~C).
store( i , j, xmm1 );
918 (~C).
store( i+1UL, j, xmm2 );
922 for(
size_t k=0UL; k<K; ++k ) {
923 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
925 (~C).
store( i, j, xmm1 );
946 template<
typename MT3
949 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
950 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
955 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
959 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
963 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
988 template<
typename MT3
991 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
992 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
994 selectDefaultAssignKernel( C, A, B );
1014 template<
typename MT3
1017 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1018 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1020 sgemm( C, A, B, 1.0F, 0.0F );
1041 template<
typename MT3
1044 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1045 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1047 dgemm( C, A, B, 1.0, 0.0 );
1068 template<
typename MT3
1071 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1072 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1074 cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 0.0F, 0.0F ) );
1095 template<
typename MT3
1098 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1099 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1101 zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 0.0, 0.0 ) );
1120 template<
typename MT
1122 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1127 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
1139 const TmpType tmp(
serial( rhs ) );
1160 template<
typename MT >
1161 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1171 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1173 else if( IsSymmetric<MT1>::value )
1194 template<
typename MT
1196 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1204 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1218 DMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1234 template<
typename MT3
1237 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1240 DMatDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1242 DMatDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1261 template<
typename MT3
1264 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1265 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1267 const size_t M( A.rows() );
1268 const size_t N( B.columns() );
1269 const size_t K( A.columns() );
1272 const size_t end( N &
size_t(-2) );
1274 for(
size_t i=0UL; i<M; ++i ) {
1275 for(
size_t k=0UL; k<K; ++k ) {
1276 for(
size_t j=0UL; j<
end; j+=2UL ) {
1277 C(i,j ) += A(i,k) * B(k,j );
1278 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1281 C(i,end) += A(i,k) * B(k,end);
1303 template<
typename MT3
1306 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1307 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1309 typedef IntrinsicTrait<ElementType> IT;
1311 const size_t M( A.rows() );
1312 const size_t N( B.columns() );
1313 const size_t K( A.columns() );
1318 for(
size_t i=0UL; i<M; ++i ) {
1327 for(
size_t k=0UL; k<K; ++k ) {
1329 xmm1 = xmm1 + a1 * B.load(k,j );
1330 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
1331 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
1332 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
1333 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
1334 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
1335 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
1336 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
1338 (~C).
store( i, j , xmm1 );
1350 for( ; (i+2UL) <= M; i+=2UL ) {
1359 for(
size_t k=0UL; k<K; ++k ) {
1366 xmm1 = xmm1 + a1 * b1;
1367 xmm2 = xmm2 + a1 * b2;
1368 xmm3 = xmm3 + a1 * b3;
1369 xmm4 = xmm4 + a1 * b4;
1370 xmm5 = xmm5 + a2 * b1;
1371 xmm6 = xmm6 + a2 * b2;
1372 xmm7 = xmm7 + a2 * b3;
1373 xmm8 = xmm8 + a2 * b4;
1375 (~C).
store( i , j , xmm1 );
1379 (~C).
store( i+1UL, j , xmm5 );
1389 for(
size_t k=0UL; k<K; ++k ) {
1391 xmm1 = xmm1 + a1 * B.load(k,j );
1392 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
1393 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
1394 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
1396 (~C).
store( i, j , xmm1 );
1404 for( ; (i+2UL) <= M; i+=2UL ) {
1409 for(
size_t k=0UL; k<K; ++k ) {
1414 xmm1 = xmm1 + a1 * b1;
1415 xmm2 = xmm2 + a1 * b2;
1416 xmm3 = xmm3 + a2 * b1;
1417 xmm4 = xmm4 + a2 * b2;
1419 (~C).
store( i , j , xmm1 );
1421 (~C).
store( i+1UL, j , xmm3 );
1427 for(
size_t k=0UL; k<K; ++k ) {
1429 xmm1 = xmm1 + a1 * B.load(k,j );
1430 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
1432 (~C).
store( i, j , xmm1 );
1438 for( ; (i+2UL) <= M; i+=2UL ) {
1441 for(
size_t k=0UL; k<K; ++k ) {
1443 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
1444 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
1446 (~C).
store( i , j, xmm1 );
1447 (~C).
store( i+1UL, j, xmm2 );
1451 for(
size_t k=0UL; k<K; ++k ) {
1452 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
1454 (~C).
store( i, j, xmm1 );
1475 template<
typename MT3
1478 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1479 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1484 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1488 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1492 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1518 template<
typename MT3
1521 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1522 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1524 selectDefaultAddAssignKernel( C, A, B );
1544 template<
typename MT3
1547 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1548 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1550 sgemm( C, A, B, 1.0F, 1.0F );
1571 template<
typename MT3
1574 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1575 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1577 dgemm( C, A, B, 1.0, 1.0 );
1598 template<
typename MT3
1601 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1602 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1604 cgemm( C, A, B, complex<float>( 1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
1625 template<
typename MT3
1628 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1629 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1631 zgemm( C, A, B, complex<double>( 1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
1652 template<
typename MT >
1653 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1663 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
1665 else if( IsSymmetric<MT1>::value )
1690 template<
typename MT
1692 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
1700 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1714 DMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1730 template<
typename MT3
1733 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1736 DMatDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1738 DMatDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1757 template<
typename MT3
1760 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1761 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1763 const size_t M( A.rows() );
1764 const size_t N( B.columns() );
1765 const size_t K( A.columns() );
1768 const size_t end( N &
size_t(-2) );
1770 for(
size_t i=0UL; i<M; ++i ) {
1771 for(
size_t k=0UL; k<K; ++k ) {
1772 for(
size_t j=0UL; j<
end; j+=2UL ) {
1773 C(i,j ) -= A(i,k) * B(k,j );
1774 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1777 C(i,end) -= A(i,k) * B(k,end);
1799 template<
typename MT3
1802 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1803 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1805 typedef IntrinsicTrait<ElementType> IT;
1807 const size_t M( A.rows() );
1808 const size_t N( B.columns() );
1809 const size_t K( A.columns() );
1814 for(
size_t i=0UL; i<M; ++i ) {
1823 for(
size_t k=0UL; k<K; ++k ) {
1825 xmm1 = xmm1 - a1 * B.load(k,j );
1826 xmm2 = xmm2 - a1 * B.load(k,j+
IT::size );
1827 xmm3 = xmm3 - a1 * B.load(k,j+
IT::size*2UL);
1828 xmm4 = xmm4 - a1 * B.load(k,j+
IT::size*3UL);
1829 xmm5 = xmm5 - a1 * B.load(k,j+
IT::size*4UL);
1830 xmm6 = xmm6 - a1 * B.load(k,j+
IT::size*5UL);
1831 xmm7 = xmm7 - a1 * B.load(k,j+
IT::size*6UL);
1832 xmm8 = xmm8 - a1 * B.load(k,j+
IT::size*7UL);
1834 (~C).
store( i, j , xmm1 );
1846 for( ; (i+2UL) <= M; i+=2UL ) {
1855 for(
size_t k=0UL; k<K; ++k ) {
1862 xmm1 = xmm1 - a1 * b1;
1863 xmm2 = xmm2 - a1 * b2;
1864 xmm3 = xmm3 - a1 * b3;
1865 xmm4 = xmm4 - a1 * b4;
1866 xmm5 = xmm5 - a2 * b1;
1867 xmm6 = xmm6 - a2 * b2;
1868 xmm7 = xmm7 - a2 * b3;
1869 xmm8 = xmm8 - a2 * b4;
1871 (~C).
store( i , j , xmm1 );
1875 (~C).
store( i+1UL, j , xmm5 );
1885 for(
size_t k=0UL; k<K; ++k ) {
1887 xmm1 = xmm1 - a1 * B.load(k,j );
1888 xmm2 = xmm2 - a1 * B.load(k,j+
IT::size );
1889 xmm3 = xmm3 - a1 * B.load(k,j+
IT::size*2UL);
1890 xmm4 = xmm4 - a1 * B.load(k,j+
IT::size*3UL);
1892 (~C).
store( i, j , xmm1 );
1900 for( ; (i+2UL) <= M; i+=2UL ) {
1905 for(
size_t k=0UL; k<K; ++k ) {
1910 xmm1 = xmm1 - a1 * b1;
1911 xmm2 = xmm2 - a1 * b2;
1912 xmm3 = xmm3 - a2 * b1;
1913 xmm4 = xmm4 - a2 * b2;
1915 (~C).
store( i , j , xmm1 );
1917 (~C).
store( i+1UL, j , xmm3 );
1923 for(
size_t k=0UL; k<K; ++k ) {
1925 xmm1 = xmm1 - a1 * B.load(k,j );
1926 xmm2 = xmm2 - a1 * B.load(k,j+
IT::size);
1928 (~C).
store( i, j , xmm1 );
1934 for( ; (i+2UL) <= M; i+=2UL ) {
1937 for(
size_t k=0UL; k<K; ++k ) {
1939 xmm1 = xmm1 -
set( A(i ,k) ) * b1;
1940 xmm2 = xmm2 -
set( A(i+1UL,k) ) * b1;
1942 (~C).
store( i , j, xmm1 );
1943 (~C).
store( i+1UL, j, xmm2 );
1947 for(
size_t k=0UL; k<K; ++k ) {
1948 xmm1 = xmm1 -
set( A(i,k) ) * B.load(k,j);
1950 (~C).
store( i, j, xmm1 );
1971 template<
typename MT3
1974 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1975 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1980 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1984 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1988 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2014 template<
typename MT3
2017 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
2018 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2020 selectDefaultSubAssignKernel( C, A, B );
2040 template<
typename MT3
2043 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
2044 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2046 sgemm( C, A, B, -1.0F, 1.0F );
2067 template<
typename MT3
2070 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
2071 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2073 dgemm( C, A, B, -1.0, 1.0 );
2094 template<
typename MT3
2097 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2098 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2100 cgemm( C, A, B, complex<float>( -1.0F, 0.0F ), complex<float>( 1.0F, 0.0F ) );
2121 template<
typename MT3
2124 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2125 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2127 zgemm( C, A, B, complex<double>( -1.0, 0.0 ), complex<double>( 1.0, 0.0 ) );
2148 template<
typename MT >
2149 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2159 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2161 else if( IsSymmetric<MT1>::value )
2196 template<
typename MT
2198 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2206 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2209 else if( rhs.lhs_.columns() == 0UL ) {
2244 template<
typename MT
2246 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2251 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2263 const TmpType tmp( rhs );
2284 template<
typename MT >
2285 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2295 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2297 else if( IsSymmetric<MT1>::value )
2321 template<
typename MT
2323 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2331 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2365 template<
typename MT >
2366 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2376 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2378 else if( IsSymmetric<MT1>::value )
2406 template<
typename MT
2408 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
2416 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2450 template<
typename MT >
2451 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
2461 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
2463 else if( IsSymmetric<MT1>::value )
2512 template<
typename MT1
2516 :
public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
2517 ,
private MatScalarMultExpr
2518 ,
private Computation
2522 typedef DMatDMatMultExpr<MT1,MT2> MMM;
2534 enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2539 enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2549 template<
typename T1,
typename T2,
typename T3 >
2550 struct CanExploitSymmetry {
2551 enum { value = IsColumnMajorMatrix<T1>::value &&
2552 ( IsSymmetric<T2>::value || IsSymmetric<T3>::value ) };
2561 template<
typename T1,
typename T2,
typename T3 >
2562 struct IsEvaluationRequired {
2563 enum { value = ( evaluateLeft || evaluateRight ) &&
2564 !CanExploitSymmetry<T1,T2,T3>::value };
2573 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2574 struct UseSinglePrecisionKernel {
2576 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2577 IsFloat<typename T1::ElementType>::value &&
2578 IsFloat<typename T2::ElementType>::value &&
2579 IsFloat<typename T3::ElementType>::value &&
2580 !IsComplex<T4>::value };
2589 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2590 struct UseDoublePrecisionKernel {
2592 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2593 IsDouble<typename T1::ElementType>::value &&
2594 IsDouble<typename T2::ElementType>::value &&
2595 IsDouble<typename T3::ElementType>::value &&
2596 !IsComplex<T4>::value };
2605 template<
typename T1,
typename T2,
typename T3 >
2606 struct UseSinglePrecisionComplexKernel {
2607 typedef complex<float> Type;
2609 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2610 IsSame<typename T1::ElementType,Type>::value &&
2611 IsSame<typename T2::ElementType,Type>::value &&
2612 IsSame<typename T3::ElementType,Type>::value };
2621 template<
typename T1,
typename T2,
typename T3 >
2622 struct UseDoublePrecisionComplexKernel {
2623 typedef complex<double> Type;
2625 T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2626 IsSame<typename T1::ElementType,Type>::value &&
2627 IsSame<typename T2::ElementType,Type>::value &&
2628 IsSame<typename T3::ElementType,Type>::value };
2636 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2637 struct UseDefaultKernel {
2638 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2639 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2640 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2641 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2649 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2650 struct UseVectorizedDefaultKernel {
2651 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2652 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2653 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2654 IsSame<typename T1::ElementType,T4>::value &&
2655 IntrinsicTrait<typename T1::ElementType>::addition &&
2656 IntrinsicTrait<typename T1::ElementType>::subtraction &&
2657 IntrinsicTrait<typename T1::ElementType>::multiplication };
2663 typedef DMatScalarMultExpr<MMM,ST,false>
This;
2664 typedef typename MultTrait<RES,ST>::Type
ResultType;
2668 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2673 typedef const DMatDMatMultExpr<MT1,MT2>
LeftOperand;
2679 typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type
LT;
2682 typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type
RT;
2687 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2688 IsSame<ET1,ET2>::value &&
2689 IsSame<ET1,ST>::value &&
2690 IntrinsicTrait<ET1>::addition &&
2691 IntrinsicTrait<ET1>::multiplication };
2694 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2695 !evaluateRight && MT2::smpAssignable };
2704 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2720 return matrix_(i,j) * scalar_;
2729 inline size_t rows()
const {
2730 return matrix_.rows();
2739 inline size_t columns()
const {
2740 return matrix_.columns();
2770 template<
typename T >
2771 inline bool canAlias(
const T* alias )
const {
2772 return matrix_.canAlias( alias );
2782 template<
typename T >
2783 inline bool isAliased(
const T* alias )
const {
2784 return matrix_.isAliased( alias );
2794 return matrix_.isAligned();
2804 typename MMM::LeftOperand A( matrix_.leftOperand() );
2833 template<
typename MT3
2836 static inline void sgemm( MT3& C,
const MT4& A,
const MT5& B,
float alpha,
float beta )
2838 using boost::numeric_cast;
2844 const int M ( numeric_cast<int>( A.rows() ) );
2845 const int N ( numeric_cast<int>( B.columns() ) );
2846 const int K ( numeric_cast<int>( A.columns() ) );
2847 const int lda( numeric_cast<int>( A.spacing() ) );
2848 const int ldb( numeric_cast<int>( B.spacing() ) );
2849 const int ldc( numeric_cast<int>( C.spacing() ) );
2851 if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
2852 cblas_ssymm( CblasRowMajor, CblasLeft, CblasLower,
2853 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2855 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2856 cblas_ssymm( CblasRowMajor, CblasRight, CblasLower,
2857 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2860 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2861 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2862 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2863 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2885 template<
typename MT3
2888 static inline void dgemm( MT3& C,
const MT4& A,
const MT5& B,
double alpha,
double beta )
2890 using boost::numeric_cast;
2896 const int M ( numeric_cast<int>( A.rows() ) );
2897 const int N ( numeric_cast<int>( B.columns() ) );
2898 const int K ( numeric_cast<int>( A.columns() ) );
2899 const int lda( numeric_cast<int>( A.spacing() ) );
2900 const int ldb( numeric_cast<int>( B.spacing() ) );
2901 const int ldc( numeric_cast<int>( C.spacing() ) );
2903 if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
2904 cblas_dsymm( CblasRowMajor, CblasLeft, CblasLower,
2905 M, N, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2907 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2908 cblas_dsymm( CblasRowMajor, CblasRight, CblasLower,
2909 M, N, alpha, B.data(), ldb, A.data(), lda, beta, C.data(), ldc );
2912 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2913 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2914 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2915 M, N, K, alpha, A.data(), lda, B.data(), ldb, beta, C.data(), ldc );
2937 template<
typename MT3
2940 static inline void cgemm( MT3& C,
const MT4& A,
const MT5& B,
2941 complex<float> alpha, complex<float> beta )
2943 using boost::numeric_cast;
2952 const int M ( numeric_cast<int>( A.rows() ) );
2953 const int N ( numeric_cast<int>( B.columns() ) );
2954 const int K ( numeric_cast<int>( A.columns() ) );
2955 const int lda( numeric_cast<int>( A.spacing() ) );
2956 const int ldb( numeric_cast<int>( B.spacing() ) );
2957 const int ldc( numeric_cast<int>( C.spacing() ) );
2959 if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
2960 cblas_csymm( CblasRowMajor, CblasLeft, CblasLower,
2961 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2963 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
2964 cblas_csymm( CblasRowMajor, CblasRight, CblasLower,
2965 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
2968 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2969 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2970 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2971 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2993 template<
typename MT3
2996 static inline void zgemm( MT3& C,
const MT4& A,
const MT5& B,
2997 complex<double> alpha, complex<double> beta )
2999 using boost::numeric_cast;
3008 const int M ( numeric_cast<int>( A.rows() ) );
3009 const int N ( numeric_cast<int>( B.columns() ) );
3010 const int K ( numeric_cast<int>( A.columns() ) );
3011 const int lda( numeric_cast<int>( A.spacing() ) );
3012 const int ldb( numeric_cast<int>( B.spacing() ) );
3013 const int ldc( numeric_cast<int>( C.spacing() ) );
3015 if( IsSymmetric<MT4>::value && IsRowMajorMatrix<MT3>::value ) {
3016 cblas_zsymm( CblasRowMajor, CblasLeft, CblasLower,
3017 M, N, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3019 else if( IsSymmetric<MT5>::value && IsRowMajorMatrix<MT3>::value ) {
3020 cblas_zsymm( CblasRowMajor, CblasRight, CblasLower,
3021 M, N, &alpha, B.data(), ldb, A.data(), lda, &beta, C.data(), ldc );
3024 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3025 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3026 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3027 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3045 template<
typename MT
3047 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3048 assign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3055 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3056 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3058 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
3061 else if( left.columns() == 0UL ) {
3076 DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
3091 template<
typename MT3
3095 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3098 DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
3100 DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
3118 template<
typename MT3
3122 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3123 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3125 const size_t M( A.rows() );
3126 const size_t N( B.columns() );
3127 const size_t K( A.columns() );
3129 for(
size_t i=0UL; i<M; ++i ) {
3130 for(
size_t j=0UL; j<N; ++j ) {
3131 C(i,j) = A(i,0UL) * B(0UL,j);
3133 for(
size_t k=1UL; k<K; ++k ) {
3134 for(
size_t j=0UL; j<N; ++j ) {
3135 C(i,j) += A(i,k) * B(k,j);
3138 for(
size_t j=0UL; j<N; ++j ) {
3159 template<
typename MT3
3163 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3164 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3166 typedef IntrinsicTrait<ElementType> IT;
3168 const size_t M( A.rows() );
3169 const size_t N( B.columns() );
3170 const size_t K( A.columns() );
3177 for(
size_t i=0UL; i<M; ++i ) {
3178 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3179 for(
size_t k=0UL; k<K; ++k ) {
3181 xmm1 = xmm1 + a1 * B.load(k,j );
3182 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
3183 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
3184 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
3185 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
3186 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
3187 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
3188 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
3190 (~C).
store( i, j , xmm1 * factor );
3202 for( ; (i+2UL) <= M; i+=2UL ) {
3203 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3204 for(
size_t k=0UL; k<K; ++k ) {
3211 xmm1 = xmm1 + a1 * b1;
3212 xmm2 = xmm2 + a1 * b2;
3213 xmm3 = xmm3 + a1 * b3;
3214 xmm4 = xmm4 + a1 * b4;
3215 xmm5 = xmm5 + a2 * b1;
3216 xmm6 = xmm6 + a2 * b2;
3217 xmm7 = xmm7 + a2 * b3;
3218 xmm8 = xmm8 + a2 * b4;
3220 (~C).
store( i , j , xmm1 * factor );
3224 (~C).
store( i+1UL, j , xmm5 * factor );
3231 for(
size_t k=0UL; k<K; ++k ) {
3233 xmm1 = xmm1 + a1 * B.load(k,j );
3234 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
3235 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
3236 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
3238 (~C).
store( i, j , xmm1 * factor );
3246 for( ; (i+2UL) <= M; i+=2UL ) {
3248 for(
size_t k=0UL; k<K; ++k ) {
3253 xmm1 = xmm1 + a1 * b1;
3254 xmm2 = xmm2 + a1 * b2;
3255 xmm3 = xmm3 + a2 * b1;
3256 xmm4 = xmm4 + a2 * b2;
3258 (~C).
store( i , j , xmm1 * factor );
3260 (~C).
store( i+1UL, j , xmm3 * factor );
3265 for(
size_t k=0UL; k<K; ++k ) {
3267 xmm1 = xmm1 + a1 * B.load(k,j );
3268 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
3270 (~C).
store( i, j , xmm1 * factor );
3276 for( ; (i+2UL) <= M; i+=2UL ) {
3278 for(
size_t k=0UL; k<K; ++k ) {
3280 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
3281 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
3283 (~C).
store( i , j, xmm1 * factor );
3284 (~C).
store( i+1UL, j, xmm2 * factor );
3288 for(
size_t k=0UL; k<K; ++k ) {
3289 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
3291 (~C).
store( i, j, xmm1 * factor );
3311 template<
typename MT3
3315 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3316 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3321 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3323 assign( ~C, tmp * B * scalar );
3325 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3327 assign( ~C, A * tmp * scalar );
3329 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3331 assign( ~C, tmp * B * scalar );
3335 assign( ~C, A * tmp * scalar );
3354 template<
typename MT3
3358 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3359 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3361 selectDefaultAssignKernel( C, A, B, scalar );
3380 template<
typename MT3
3384 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3385 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3387 sgemm( C, A, B, scalar, 0.0F );
3407 template<
typename MT3
3411 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3412 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3414 dgemm( C, A, B, scalar, 0.0 );
3434 template<
typename MT3
3438 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3439 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3441 cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 0.0F, 0.0F ) );
3461 template<
typename MT3
3465 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3466 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3468 zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 0.0, 0.0 ) );
3485 template<
typename MT
3487 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3488 assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3492 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
3504 const TmpType tmp(
serial( rhs ) );
3523 template<
typename MT >
3524 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3525 assign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
3534 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3535 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3537 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
3539 else if( IsSymmetric<MT1>::value )
3540 assign( ~lhs,
trans( left ) * right * rhs.scalar_ );
3542 assign( ~lhs, left *
trans( right ) * rhs.scalar_ );
3558 template<
typename MT
3560 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3561 addAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3568 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3569 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3571 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3585 DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3600 template<
typename MT3
3604 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3607 DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3609 DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3627 template<
typename MT3
3631 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3632 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3653 template<
typename MT3
3657 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3658 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3660 typedef IntrinsicTrait<ElementType> IT;
3662 const size_t M( A.rows() );
3663 const size_t N( B.columns() );
3664 const size_t K( A.columns() );
3671 for(
size_t i=0UL; i<M; ++i ) {
3672 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3673 for(
size_t k=0UL; k<K; ++k ) {
3675 xmm1 = xmm1 + a1 * B.load(k,j );
3676 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
3677 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
3678 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
3679 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
3680 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
3681 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
3682 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
3684 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3696 for( ; (i+2UL) <= M; i+=2UL ) {
3697 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3698 for(
size_t k=0UL; k<K; ++k ) {
3705 xmm1 = xmm1 + a1 * b1;
3706 xmm2 = xmm2 + a1 * b2;
3707 xmm3 = xmm3 + a1 * b3;
3708 xmm4 = xmm4 + a1 * b4;
3709 xmm5 = xmm5 + a2 * b1;
3710 xmm6 = xmm6 + a2 * b2;
3711 xmm7 = xmm7 + a2 * b3;
3712 xmm8 = xmm8 + a2 * b4;
3714 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3718 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
3725 for(
size_t k=0UL; k<K; ++k ) {
3727 xmm1 = xmm1 + a1 * B.load(k,j );
3728 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
3729 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
3730 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
3732 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3740 for( ; (i+2UL) <= M; i+=2UL ) {
3742 for(
size_t k=0UL; k<K; ++k ) {
3747 xmm1 = xmm1 + a1 * b1;
3748 xmm2 = xmm2 + a1 * b2;
3749 xmm3 = xmm3 + a2 * b1;
3750 xmm4 = xmm4 + a2 * b2;
3752 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3754 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
3759 for(
size_t k=0UL; k<K; ++k ) {
3761 xmm1 = xmm1 + a1 * B.load(k,j );
3762 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
3764 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3770 for( ; (i+2UL) <= M; i+=2UL ) {
3772 for(
size_t k=0UL; k<K; ++k ) {
3774 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
3775 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
3777 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3778 (~C).
store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
3782 for(
size_t k=0UL; k<K; ++k ) {
3783 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
3785 (~C).
store( i, j, (~C).load(i,j) + xmm1 * factor );
3805 template<
typename MT3
3809 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3810 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3815 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3819 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3823 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3848 template<
typename MT3
3852 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3853 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3855 selectDefaultAddAssignKernel( C, A, B, scalar );
3874 template<
typename MT3
3878 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3879 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3881 sgemm( C, A, B, scalar, 1.0F );
3901 template<
typename MT3
3905 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3906 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3908 dgemm( C, A, B, scalar, 1.0 );
3928 template<
typename MT3
3932 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3933 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3935 cgemm( C, A, B, complex<float>( scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
3955 template<
typename MT3
3959 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3960 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3962 zgemm( C, A, B, complex<double>( scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
3981 template<
typename MT >
3982 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
3983 addAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
3992 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3993 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3995 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
3997 else if( IsSymmetric<MT1>::value )
4020 template<
typename MT
4022 friend inline typename DisableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4023 subAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4030 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4031 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4033 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4047 DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
4062 template<
typename MT3
4066 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4069 DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
4071 DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
4089 template<
typename MT3
4093 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4094 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4115 template<
typename MT3
4119 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4120 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
4122 typedef IntrinsicTrait<ElementType> IT;
4124 const size_t M( A.rows() );
4125 const size_t N( B.columns() );
4126 const size_t K( A.columns() );
4133 for(
size_t i=0UL; i<M; ++i ) {
4134 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4135 for(
size_t k=0UL; k<K; ++k ) {
4137 xmm1 = xmm1 + a1 * B.load(k,j );
4138 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
4139 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
4140 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
4141 xmm5 = xmm5 + a1 * B.load(k,j+
IT::size*4UL);
4142 xmm6 = xmm6 + a1 * B.load(k,j+
IT::size*5UL);
4143 xmm7 = xmm7 + a1 * B.load(k,j+
IT::size*6UL);
4144 xmm8 = xmm8 + a1 * B.load(k,j+
IT::size*7UL);
4146 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
4158 for( ; (i+2UL) <= M; i+=2UL ) {
4159 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
4160 for(
size_t k=0UL; k<K; ++k ) {
4167 xmm1 = xmm1 + a1 * b1;
4168 xmm2 = xmm2 + a1 * b2;
4169 xmm3 = xmm3 + a1 * b3;
4170 xmm4 = xmm4 + a1 * b4;
4171 xmm5 = xmm5 + a2 * b1;
4172 xmm6 = xmm6 + a2 * b2;
4173 xmm7 = xmm7 + a2 * b3;
4174 xmm8 = xmm8 + a2 * b4;
4176 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4180 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
4187 for(
size_t k=0UL; k<K; ++k ) {
4189 xmm1 = xmm1 + a1 * B.load(k,j );
4190 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size );
4191 xmm3 = xmm3 + a1 * B.load(k,j+
IT::size*2UL);
4192 xmm4 = xmm4 + a1 * B.load(k,j+
IT::size*3UL);
4194 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
4202 for( ; (i+2UL) <= M; i+=2UL ) {
4204 for(
size_t k=0UL; k<K; ++k ) {
4209 xmm1 = xmm1 + a1 * b1;
4210 xmm2 = xmm2 + a1 * b2;
4211 xmm3 = xmm3 + a2 * b1;
4212 xmm4 = xmm4 + a2 * b2;
4214 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
4216 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
4221 for(
size_t k=0UL; k<K; ++k ) {
4223 xmm1 = xmm1 + a1 * B.load(k,j );
4224 xmm2 = xmm2 + a1 * B.load(k,j+
IT::size);
4226 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
4232 for( ; (i+2UL) <= M; i+=2UL ) {
4234 for(
size_t k=0UL; k<K; ++k ) {
4236 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
4237 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
4239 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
4240 (~C).
store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
4244 for(
size_t k=0UL; k<K; ++k ) {
4245 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
4247 (~C).
store( i, j, (~C).load(i,j) - xmm1 * factor );
4267 template<
typename MT3
4271 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4272 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
4277 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
4281 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
4285 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
4310 template<
typename MT3
4314 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
4315 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4317 selectDefaultSubAssignKernel( C, A, B, scalar );
4336 template<
typename MT3
4340 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4341 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4343 sgemm( C, A, B, -scalar, 1.0F );
4363 template<
typename MT3
4367 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
4368 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4370 dgemm( C, A, B, -scalar, 1.0 );
4390 template<
typename MT3
4394 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4395 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4397 cgemm( C, A, B, complex<float>( -scalar, 0.0F ), complex<float>( 1.0F, 0.0F ) );
4417 template<
typename MT3
4421 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4422 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4424 zgemm( C, A, B, complex<double>( -scalar, 0.0 ), complex<double>( 1.0, 0.0 ) );
4443 template<
typename MT >
4444 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4445 subAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
4454 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4455 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4457 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4459 else if( IsSymmetric<MT1>::value )
4493 template<
typename MT
4495 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4496 smpAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4503 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4504 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4506 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
4509 else if( left.columns() == 0UL ) {
4543 template<
typename MT
4545 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4546 smpAssign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4550 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
4562 const TmpType tmp( rhs );
4581 template<
typename MT >
4582 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4583 smpAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
4592 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4593 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4595 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4597 else if( IsSymmetric<MT1>::value )
4619 template<
typename MT
4621 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4622 smpAddAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4629 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4630 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4632 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4664 template<
typename MT >
4665 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4666 smpAddAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
4675 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4676 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4678 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4680 else if( IsSymmetric<MT1>::value )
4706 template<
typename MT
4708 friend inline typename EnableIf< IsEvaluationRequired<MT,MT1,MT2> >::Type
4709 smpSubAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4716 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4717 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4719 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4751 template<
typename MT >
4752 friend inline typename EnableIf< CanExploitSymmetry<MT,MT1,MT2> >::Type
4753 smpSubAssign( Matrix<MT,true>& lhs,
const DMatScalarMultExpr& rhs )
4762 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4763 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4765 if( IsSymmetric<MT1>::value && IsSymmetric<MT2>::value )
4767 else if( IsSymmetric<MT1>::value )
4835 template<
typename T1
4837 inline const DMatDMatMultExpr<T1,T2>
4843 throw std::invalid_argument(
"Matrix sizes do not match" );
4860 template<
typename MT1,
typename MT2 >
4878 template<
typename MT1,
typename MT2 >
4880 :
public Columns<MT2>
4896 template<
typename MT1,
typename MT2 >
4898 :
public IsTrue< IsLower<MT1>::value && IsLower<MT2>::value >
4914 template<
typename MT1,
typename MT2 >
4916 :
public IsTrue< IsUpper<MT1>::value && IsUpper<MT2>::value >
4932 template<
typename MT1,
typename MT2,
typename VT >
4937 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4938 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4939 IsDenseVector<VT>::value && IsColumnVector<VT>::value
4940 ,
typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
4941 , INVALID_TYPE >::Type Type;
4950 template<
typename MT1,
typename MT2,
typename VT >
4955 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4956 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4957 IsSparseVector<VT>::value && IsColumnVector<VT>::value
4958 ,
typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
4959 , INVALID_TYPE >::Type Type;
4968 template<
typename VT,
typename MT1,
typename MT2 >
4973 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4974 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4975 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4976 ,
typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4977 , INVALID_TYPE >::Type Type;
4986 template<
typename VT,
typename MT1,
typename MT2 >
4991 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4992 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4993 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4994 ,
typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4995 , INVALID_TYPE >::Type Type;
5004 template<
typename MT1,
typename MT2,
bool AF >
5009 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
5010 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
5019 template<
typename MT1,
typename MT2 >
5024 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
5033 template<
typename MT1,
typename MT2 >
5038 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:134
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
Header file for the Rows type trait.
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4838
Header file for the SparseVector base class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:131
BLAZE_ALWAYS_INLINE size_t size(const Vector< VT, TF > &vector)
Returns the current size/dimension of the vector.
Definition: Vector.h:258
BLAZE_ALWAYS_INLINE MT::Iterator end(Matrix< MT, SO > &matrix, size_t i)
Returns an iterator just past the last element of row/column i.
Definition: Matrix.h:258
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:205
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:292
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:364
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
DMatDMatMultExpr< MT1, MT2 > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:279
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:283
Header file for the IsColumnMajorMatrix type trait.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2478
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:257
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:224
Header file for the DenseVector base class.
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:255
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:289
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:695
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:123
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2474
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:132
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:384
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsSymmetric type trait.
Header file for the IsDouble type trait.
Compile time check for row-major matrix types.This type trait tests whether or not the given template...
Definition: IsRowMajorMatrix.h:104
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDMatMultExpr.h:284
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDMatMultExpr.h:428
Header file for the DenseMatrix base class.
BLAZE_ALWAYS_INLINE void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:635
Header file for the Columns type trait.
const size_t SMP_DMATDMATMULT_THRESHOLD
SMP row-major dense matrix/row-major dense matrix multiplication threshold.This threshold specifies w...
Definition: Thresholds.h:834
const size_t DMATDMATMULT_THRESHOLD
Row-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the thr...
Definition: Thresholds.h:125
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
Header file for the IsLower type trait.
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Compile time check for symmetric matrices.This type trait tests whether or not the given template par...
Definition: IsSymmetric.h:85
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:319
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2476
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDMatMultExpr.h:438
Header file for the IsDenseMatrix type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:281
Header file for the EnableIf class template.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:406
Header file for the serial shim.
#define BLAZE_CONSTRAINT_MUST_FORM_VALID_MATMATMULTEXPR(T1, T2)
Constraint on the data type.In case the given data types T1 and T2 do not form a valid matrix/matrix ...
Definition: MatMatMultExpr.h:165
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:394
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:334
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:447
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:211
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
EnableIf< IsDenseMatrix< MT1 > >::Type smpSubAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:160
Header file for the IsSparseVector type trait.
#define BLAZE_CONSTRAINT_MUST_NOT_BE_SYMMETRIC_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is a symmetric matrix type, a compilation error is created.
Definition: Symmetric.h:116
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:749
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:280
Header file for run time assertion macros.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:98
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:142
BLAZE_ALWAYS_INLINE void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:742
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:286
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:374
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:282
BLAZE_ALWAYS_INLINE void reset(const NonNumericProxy< MT > &proxy)
Resetting the represented element to the default initial values.
Definition: NonNumericProxy.h:833
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:298
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:295
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:285
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsRowMajorMatrix type trait.
const DMatTransExpr< MT,!SO > trans(const DenseMatrix< MT, SO > &dm)
Calculation of the transpose of the given dense matrix.
Definition: DMatTransExpr.h:932
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:256
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
EnableIf< IsDenseMatrix< MT1 > >::Type smpAddAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:129
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:129
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2473
Header file for the IsTrue value trait.
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
Header file for the complex data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:133
Header file for the IsUpper type trait.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:130
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Constraint on the data type.
BLAZE_ALWAYS_INLINE EnableIf< IsIntegral< T > >::Type store(T *address, const typename Store< T, sizeof(T)>::Type &value)
Aligned store of a vector of integral values.
Definition: Store.h:225
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:448
Header file for the IsResizable type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:418
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.
BLAZE_ALWAYS_INLINE void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:849