35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
110 template<
typename MT1
112 class TDMatTDMatMultExpr :
public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
113 ,
private MatMatMultExpr
114 ,
private Computation
142 template<
typename MT >
143 struct UseSMPAssign {
144 enum { value = ( evaluateLeft || evaluateRight ) };
154 template<
typename T1,
typename T2,
typename T3 >
155 struct UseSinglePrecisionKernel {
156 enum { value = IsFloat<typename T1::ElementType>::value &&
157 IsFloat<typename T2::ElementType>::value &&
158 IsFloat<typename T3::ElementType>::value };
168 template<
typename T1,
typename T2,
typename T3 >
169 struct UseDoublePrecisionKernel {
170 enum { value = IsDouble<typename T1::ElementType>::value &&
171 IsDouble<typename T2::ElementType>::value &&
172 IsDouble<typename T3::ElementType>::value };
183 template<
typename T1,
typename T2,
typename T3 >
184 struct UseSinglePrecisionComplexKernel {
185 typedef complex<float> Type;
186 enum { value = IsSame<typename T1::ElementType,Type>::value &&
187 IsSame<typename T2::ElementType,Type>::value &&
188 IsSame<typename T3::ElementType,Type>::value };
199 template<
typename T1,
typename T2,
typename T3 >
200 struct UseDoublePrecisionComplexKernel {
201 typedef complex<double> Type;
202 enum { value = IsSame<typename T1::ElementType,Type>::value &&
203 IsSame<typename T2::ElementType,Type>::value &&
204 IsSame<typename T3::ElementType,Type>::value };
214 template<
typename T1,
typename T2,
typename T3 >
215 struct UseDefaultKernel {
216 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
217 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
218 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
219 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
229 template<
typename T1,
typename T2,
typename T3 >
230 struct UseVectorizedDefaultKernel {
231 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
232 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
233 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
234 IntrinsicTrait<typename T1::ElementType>::addition &&
235 IntrinsicTrait<typename T1::ElementType>::subtraction &&
236 IntrinsicTrait<typename T1::ElementType>::multiplication };
267 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
273 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
274 !evaluateRight && MT2::smpAssignable };
304 if(
lhs_.columns() != 0UL ) {
305 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
307 for(
size_t k=1UL; k<end; k+=2UL ) {
309 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
311 if( end <
lhs_.columns() ) {
339 return rhs_.columns();
369 template<
typename T >
371 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
381 template<
typename T >
383 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
393 return lhs_.isAligned() &&
rhs_.isAligned();
428 template<
typename MT
437 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
440 else if( rhs.lhs_.columns() == 0UL ) {
455 TDMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
471 template<
typename MT3
474 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
477 TDMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
479 TDMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
498 template<
typename MT3
501 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
502 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
504 const size_t M( A.rows() );
505 const size_t N( B.columns() );
506 const size_t K( A.columns() );
508 for(
size_t i=0UL; i<M; ++i ) {
509 for(
size_t j=0UL; j<N; ++j ) {
510 C(i,j) = A(i,0UL) * B(0UL,j);
512 for(
size_t k=1UL; k<K; ++k ) {
513 for(
size_t j=0UL; j<N; ++j ) {
514 C(i,j) += A(i,k) * B(k,j);
536 template<
typename MT3
539 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
540 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
545 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
549 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
553 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
579 template<
typename MT3
582 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
583 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
585 typedef IntrinsicTrait<ElementType> IT;
587 const size_t M( A.rows() );
588 const size_t N( B.columns() );
589 const size_t K( A.columns() );
593 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
594 for(
size_t j=0UL; j<N; ++j ) {
595 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
596 for(
size_t k=0UL; k<K; ++k ) {
598 xmm1 = xmm1 + A.load(i ,k) * b1;
599 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
600 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
601 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
602 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
603 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
604 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
605 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
607 (~C).
store( i , j, xmm1 );
608 (~C).
store( i+IT::size , j, xmm2 );
609 (~C).
store( i+IT::size*2UL, j, xmm3 );
610 (~C).
store( i+IT::size*3UL, j, xmm4 );
611 (~C).
store( i+IT::size*4UL, j, xmm5 );
612 (~C).
store( i+IT::size*5UL, j, xmm6 );
613 (~C).
store( i+IT::size*6UL, j, xmm7 );
614 (~C).
store( i+IT::size*7UL, j, xmm8 );
617 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
619 for( ; (j+2UL) <= N; j+=2UL ) {
620 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
621 for(
size_t k=0UL; k<K; ++k ) {
628 xmm1 = xmm1 + a1 * b1;
629 xmm2 = xmm2 + a2 * b1;
630 xmm3 = xmm3 + a3 * b1;
631 xmm4 = xmm4 + a4 * b1;
632 xmm5 = xmm5 + a1 * b2;
633 xmm6 = xmm6 + a2 * b2;
634 xmm7 = xmm7 + a3 * b2;
635 xmm8 = xmm8 + a4 * b2;
637 (~C).
store( i , j , xmm1 );
638 (~C).
store( i+IT::size , j , xmm2 );
639 (~C).
store( i+IT::size*2UL, j , xmm3 );
640 (~C).
store( i+IT::size*3UL, j , xmm4 );
641 (~C).
store( i , j+1UL, xmm5 );
642 (~C).
store( i+IT::size , j+1UL, xmm6 );
643 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 );
644 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 );
648 for(
size_t k=0UL; k<K; ++k ) {
650 xmm1 = xmm1 + A.load(i ,k) * b1;
651 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
652 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
653 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
655 (~C).
store( i , j, xmm1 );
656 (~C).
store( i+IT::size , j, xmm2 );
657 (~C).
store( i+IT::size*2UL, j, xmm3 );
658 (~C).
store( i+IT::size*3UL, j, xmm4 );
661 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
663 for( ; (j+2UL) <= N; j+=2UL ) {
665 for(
size_t k=0UL; k<K; ++k ) {
670 xmm1 = xmm1 + a1 * b1;
671 xmm2 = xmm2 + a2 * b1;
672 xmm3 = xmm3 + a1 * b2;
673 xmm4 = xmm4 + a2 * b2;
675 (~C).
store( i , j , xmm1 );
676 (~C).
store( i+IT::size, j , xmm2 );
677 (~C).
store( i , j+1UL, xmm3 );
678 (~C).
store( i+IT::size, j+1UL, xmm4 );
682 for(
size_t k=0UL; k<K; ++k ) {
684 xmm1 = xmm1 + A.load(i ,k) * b1;
685 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
687 (~C).
store( i , j, xmm1 );
688 (~C).
store( i+IT::size, j, xmm2 );
693 for( ; (j+2UL) <= N; j+=2UL ) {
695 for(
size_t k=0UL; k<K; ++k ) {
697 xmm1 = xmm1 + a1 *
set( B(k,j ) );
698 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
700 (~C).
store( i, j , xmm1 );
701 (~C).
store( i, j+1UL, xmm2 );
705 for(
size_t k=0UL; k<K; ++k ) {
706 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
708 (~C).
store( i, j, xmm1 );
729 template<
typename MT3
732 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
733 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
735 selectDefaultAssignKernel( C, A, B );
755 template<
typename MT3
758 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
759 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
761 using boost::numeric_cast;
767 const int M ( numeric_cast<int>( A.rows() ) );
768 const int N ( numeric_cast<int>( B.columns() ) );
769 const int K ( numeric_cast<int>( A.columns() ) );
770 const int lda( numeric_cast<int>( A.spacing() ) );
771 const int ldb( numeric_cast<int>( B.spacing() ) );
772 const int ldc( numeric_cast<int>( C.spacing() ) );
774 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
775 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
776 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
777 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
798 template<
typename MT3
801 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
802 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
804 using boost::numeric_cast;
810 const int M ( numeric_cast<int>( A.rows() ) );
811 const int N ( numeric_cast<int>( B.columns() ) );
812 const int K ( numeric_cast<int>( A.columns() ) );
813 const int lda( numeric_cast<int>( A.spacing() ) );
814 const int ldb( numeric_cast<int>( B.spacing() ) );
815 const int ldc( numeric_cast<int>( C.spacing() ) );
817 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
818 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
819 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
820 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
841 template<
typename MT3
844 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
845 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
847 using boost::numeric_cast;
856 const int M ( numeric_cast<int>( A.rows() ) );
857 const int N ( numeric_cast<int>( B.columns() ) );
858 const int K ( numeric_cast<int>( A.columns() ) );
859 const int lda( numeric_cast<int>( A.spacing() ) );
860 const int ldb( numeric_cast<int>( B.spacing() ) );
861 const int ldc( numeric_cast<int>( C.spacing() ) );
862 complex<float> alpha( 1.0F, 0.0F );
863 complex<float> beta ( 0.0F, 0.0F );
865 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
866 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
867 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
868 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
889 template<
typename MT3
892 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
893 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
895 using boost::numeric_cast;
904 const int M ( numeric_cast<int>( A.rows() ) );
905 const int N ( numeric_cast<int>( B.columns() ) );
906 const int K ( numeric_cast<int>( A.columns() ) );
907 const int lda( numeric_cast<int>( A.spacing() ) );
908 const int ldb( numeric_cast<int>( B.spacing() ) );
909 const int ldc( numeric_cast<int>( C.spacing() ) );
910 complex<double> alpha( 1.0, 0.0 );
911 complex<double> beta ( 0.0, 0.0 );
913 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
914 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
915 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
916 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
935 template<
typename MT
941 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
953 const TmpType tmp(
serial( rhs ) );
972 template<
typename MT
981 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
995 TDMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1011 template<
typename MT3
1014 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1017 TDMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1019 TDMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1038 template<
typename MT3
1041 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1042 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1044 const size_t M( A.rows() );
1045 const size_t N( B.columns() );
1046 const size_t K( A.columns() );
1049 const size_t end( N &
size_t(-2) );
1051 for(
size_t i=0UL; i<M; ++i ) {
1052 for(
size_t k=0UL; k<K; ++k ) {
1053 for(
size_t j=0UL; j<end; j+=2UL ) {
1054 C(i,j ) += A(i,k) * B(k,j );
1055 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1058 C(i,end) += A(i,k) * B(k,end);
1080 template<
typename MT3
1083 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1084 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1089 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1093 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1097 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1123 template<
typename MT3
1126 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1127 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1129 typedef IntrinsicTrait<ElementType> IT;
1131 const size_t M( A.rows() );
1132 const size_t N( B.columns() );
1133 const size_t K( A.columns() );
1137 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1138 for(
size_t j=0UL; j<N; ++j ) {
1147 for(
size_t k=0UL; k<K; ++k ) {
1149 xmm1 = xmm1 + A.load(i ,k) * b1;
1150 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1151 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1152 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1153 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
1154 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
1155 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
1156 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
1158 (~C).
store( i , j, xmm1 );
1159 (~C).
store( i+IT::size , j, xmm2 );
1160 (~C).
store( i+IT::size*2UL, j, xmm3 );
1161 (~C).
store( i+IT::size*3UL, j, xmm4 );
1162 (~C).
store( i+IT::size*4UL, j, xmm5 );
1163 (~C).
store( i+IT::size*5UL, j, xmm6 );
1164 (~C).
store( i+IT::size*6UL, j, xmm7 );
1165 (~C).
store( i+IT::size*7UL, j, xmm8 );
1168 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1170 for( ; (j+2UL) <= N; j+=2UL ) {
1179 for(
size_t k=0UL; k<K; ++k ) {
1186 xmm1 = xmm1 + a1 * b1;
1187 xmm2 = xmm2 + a2 * b1;
1188 xmm3 = xmm3 + a3 * b1;
1189 xmm4 = xmm4 + a4 * b1;
1190 xmm5 = xmm5 + a1 * b2;
1191 xmm6 = xmm6 + a2 * b2;
1192 xmm7 = xmm7 + a3 * b2;
1193 xmm8 = xmm8 + a4 * b2;
1195 (~C).
store( i , j , xmm1 );
1196 (~C).
store( i+IT::size , j , xmm2 );
1197 (~C).
store( i+IT::size*2UL, j , xmm3 );
1198 (~C).
store( i+IT::size*3UL, j , xmm4 );
1199 (~C).
store( i , j+1UL, xmm5 );
1200 (~C).
store( i+IT::size , j+1UL, xmm6 );
1201 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 );
1202 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 );
1209 for(
size_t k=0UL; k<K; ++k ) {
1211 xmm1 = xmm1 + A.load(i ,k) * b1;
1212 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1213 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1214 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1216 (~C).
store( i , j, xmm1 );
1217 (~C).
store( i+IT::size , j, xmm2 );
1218 (~C).
store( i+IT::size*2UL, j, xmm3 );
1219 (~C).
store( i+IT::size*3UL, j, xmm4 );
1222 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1224 for( ; (j+2UL) <= N; j+=2UL ) {
1229 for(
size_t k=0UL; k<K; ++k ) {
1234 xmm1 = xmm1 + a1 * b1;
1235 xmm2 = xmm2 + a2 * b1;
1236 xmm3 = xmm3 + a1 * b2;
1237 xmm4 = xmm4 + a2 * b2;
1239 (~C).
store( i , j , xmm1 );
1240 (~C).
store( i+IT::size, j , xmm2 );
1241 (~C).
store( i , j+1UL, xmm3 );
1242 (~C).
store( i+IT::size, j+1UL, xmm4 );
1247 for(
size_t k=0UL; k<K; ++k ) {
1249 xmm1 = xmm1 + A.load(i ,k) * b1;
1250 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1252 (~C).
store( i , j, xmm1 );
1253 (~C).
store( i+IT::size, j, xmm2 );
1258 for( ; (j+2UL) <= N; j+=2UL ) {
1261 for(
size_t k=0UL; k<K; ++k ) {
1263 xmm1 = xmm1 + a1 *
set( B(k,j ) );
1264 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
1266 (~C).
store( i, j , xmm1 );
1267 (~C).
store( i, j+1UL, xmm2 );
1271 for(
size_t k=0UL; k<K; ++k ) {
1272 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
1274 (~C).
store( i, j, xmm1 );
1295 template<
typename MT3
1298 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1299 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1301 selectDefaultAddAssignKernel( C, A, B );
1321 template<
typename MT3
1324 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1325 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1327 using boost::numeric_cast;
1333 const int M ( numeric_cast<int>( A.rows() ) );
1334 const int N ( numeric_cast<int>( B.columns() ) );
1335 const int K ( numeric_cast<int>( A.columns() ) );
1336 const int lda( numeric_cast<int>( A.spacing() ) );
1337 const int ldb( numeric_cast<int>( B.spacing() ) );
1338 const int ldc( numeric_cast<int>( C.spacing() ) );
1340 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1341 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1342 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1343 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1364 template<
typename MT3
1367 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1368 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1370 using boost::numeric_cast;
1376 const int M ( numeric_cast<int>( A.rows() ) );
1377 const int N ( numeric_cast<int>( B.columns() ) );
1378 const int K ( numeric_cast<int>( A.columns() ) );
1379 const int lda( numeric_cast<int>( A.spacing() ) );
1380 const int ldb( numeric_cast<int>( B.spacing() ) );
1381 const int ldc( numeric_cast<int>( C.spacing() ) );
1383 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1384 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1385 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1386 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1407 template<
typename MT3
1410 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1411 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1413 using boost::numeric_cast;
1422 const int M ( numeric_cast<int>( A.rows() ) );
1423 const int N ( numeric_cast<int>( B.columns() ) );
1424 const int K ( numeric_cast<int>( A.columns() ) );
1425 const int lda( numeric_cast<int>( A.spacing() ) );
1426 const int ldb( numeric_cast<int>( B.spacing() ) );
1427 const int ldc( numeric_cast<int>( C.spacing() ) );
1428 const complex<float> alpha( 1.0F, 0.0F );
1429 const complex<float> beta ( 1.0F, 0.0F );
1431 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1432 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1433 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1434 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1455 template<
typename MT3
1458 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1459 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1461 using boost::numeric_cast;
1470 const int M ( numeric_cast<int>( A.rows() ) );
1471 const int N ( numeric_cast<int>( B.columns() ) );
1472 const int K ( numeric_cast<int>( A.columns() ) );
1473 const int lda( numeric_cast<int>( A.spacing() ) );
1474 const int ldb( numeric_cast<int>( B.spacing() ) );
1475 const int ldc( numeric_cast<int>( C.spacing() ) );
1476 const complex<double> alpha( 1.0, 0.0 );
1477 const complex<double> beta ( 1.0, 0.0 );
1479 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1480 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1481 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1482 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1505 template<
typename MT
1514 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1528 TDMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1544 template<
typename MT3
1547 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1550 TDMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1552 TDMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1571 template<
typename MT3
1574 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1575 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1577 const size_t M( A.rows() );
1578 const size_t N( B.columns() );
1579 const size_t K( A.columns() );
1582 const size_t end( N &
size_t(-2) );
1584 for(
size_t i=0UL; i<M; ++i ) {
1585 for(
size_t k=0UL; k<K; ++k ) {
1586 for(
size_t j=0UL; j<end; j+=2UL ) {
1587 C(i,j ) -= A(i,k) * B(k,j );
1588 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1591 C(i,end) -= A(i,k) * B(k,end);
1613 template<
typename MT3
1616 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1617 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1622 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1626 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1630 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1656 template<
typename MT3
1659 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1660 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1662 typedef IntrinsicTrait<ElementType> IT;
1664 const size_t M( A.rows() );
1665 const size_t N( B.columns() );
1666 const size_t K( A.columns() );
1670 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1671 for(
size_t j=0UL; j<N; ++j ) {
1680 for(
size_t k=0UL; k<K; ++k ) {
1682 xmm1 = xmm1 - A.load(i ,k) * b1;
1683 xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1684 xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1685 xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1686 xmm5 = xmm5 - A.load(i+IT::size*4UL,k) * b1;
1687 xmm6 = xmm6 - A.load(i+IT::size*5UL,k) * b1;
1688 xmm7 = xmm7 - A.load(i+IT::size*6UL,k) * b1;
1689 xmm8 = xmm8 - A.load(i+IT::size*7UL,k) * b1;
1691 (~C).
store( i , j, xmm1 );
1692 (~C).
store( i+IT::size , j, xmm2 );
1693 (~C).
store( i+IT::size*2UL, j, xmm3 );
1694 (~C).
store( i+IT::size*3UL, j, xmm4 );
1695 (~C).
store( i+IT::size*4UL, j, xmm5 );
1696 (~C).
store( i+IT::size*5UL, j, xmm6 );
1697 (~C).
store( i+IT::size*6UL, j, xmm7 );
1698 (~C).
store( i+IT::size*7UL, j, xmm8 );
1701 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1703 for( ; (j+2UL) <= N; j+=2UL ) {
1712 for(
size_t k=0UL; k<K; ++k ) {
1719 xmm1 = xmm1 - a1 * b1;
1720 xmm2 = xmm2 - a2 * b1;
1721 xmm3 = xmm3 - a3 * b1;
1722 xmm4 = xmm4 - a4 * b1;
1723 xmm5 = xmm5 - a1 * b2;
1724 xmm6 = xmm6 - a2 * b2;
1725 xmm7 = xmm7 - a3 * b2;
1726 xmm8 = xmm8 - a4 * b2;
1728 (~C).
store( i , j , xmm1 );
1729 (~C).
store( i+IT::size , j , xmm2 );
1730 (~C).
store( i+IT::size*2UL, j , xmm3 );
1731 (~C).
store( i+IT::size*3UL, j , xmm4 );
1732 (~C).
store( i , j+1UL, xmm5 );
1733 (~C).
store( i+IT::size , j+1UL, xmm6 );
1734 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 );
1735 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 );
1742 for(
size_t k=0UL; k<K; ++k ) {
1744 xmm1 = xmm1 - A.load(i ,k) * b1;
1745 xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1746 xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1747 xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1749 (~C).
store( i , j, xmm1 );
1750 (~C).
store( i+IT::size , j, xmm2 );
1751 (~C).
store( i+IT::size*2UL, j, xmm3 );
1752 (~C).
store( i+IT::size*3UL, j, xmm4 );
1755 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1757 for( ; (j+2UL) <= N; j+=2UL ) {
1762 for(
size_t k=0UL; k<K; ++k ) {
1767 xmm1 = xmm1 - a1 * b1;
1768 xmm2 = xmm2 - a2 * b1;
1769 xmm3 = xmm3 - a1 * b2;
1770 xmm4 = xmm4 - a2 * b2;
1772 (~C).
store( i , j , xmm1 );
1773 (~C).
store( i+IT::size, j , xmm2 );
1774 (~C).
store( i , j+1UL, xmm3 );
1775 (~C).
store( i+IT::size, j+1UL, xmm4 );
1780 for(
size_t k=0UL; k<K; ++k ) {
1782 xmm1 = xmm1 - A.load(i ,k) * b1;
1783 xmm2 = xmm2 - A.load(i+IT::size,k) * b1;
1785 (~C).
store( i , j, xmm1 );
1786 (~C).
store( i+IT::size, j, xmm2 );
1791 for( ; (j+2UL) <= N; j+=2UL ) {
1794 for(
size_t k=0UL; k<K; ++k ) {
1796 xmm1 = xmm1 - a1 *
set( B(k,j ) );
1797 xmm2 = xmm2 - a1 *
set( B(k,j+1UL) );
1799 (~C).
store( i, j , xmm1 );
1800 (~C).
store( i, j+1UL, xmm2 );
1804 for(
size_t k=0UL; k<K; ++k ) {
1805 xmm1 = xmm1 - A.load(i,k) *
set( B(k,j) );
1807 (~C).
store( i, j, xmm1 );
1828 template<
typename MT3
1831 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1832 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1834 selectDefaultSubAssignKernel( C, A, B );
1854 template<
typename MT3
1857 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1858 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1860 using boost::numeric_cast;
1866 const int M ( numeric_cast<int>( A.rows() ) );
1867 const int N ( numeric_cast<int>( B.columns() ) );
1868 const int K ( numeric_cast<int>( A.columns() ) );
1869 const int lda( numeric_cast<int>( A.spacing() ) );
1870 const int ldb( numeric_cast<int>( B.spacing() ) );
1871 const int ldc( numeric_cast<int>( C.spacing() ) );
1873 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1874 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1875 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1876 M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1897 template<
typename MT3
1900 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1901 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1903 using boost::numeric_cast;
1909 const int M ( numeric_cast<int>( A.rows() ) );
1910 const int N ( numeric_cast<int>( B.columns() ) );
1911 const int K ( numeric_cast<int>( A.columns() ) );
1912 const int lda( numeric_cast<int>( A.spacing() ) );
1913 const int ldb( numeric_cast<int>( B.spacing() ) );
1914 const int ldc( numeric_cast<int>( C.spacing() ) );
1916 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1917 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1918 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1919 M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1940 template<
typename MT3
1943 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1944 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1946 using boost::numeric_cast;
1955 const int M ( numeric_cast<int>( A.rows() ) );
1956 const int N ( numeric_cast<int>( B.columns() ) );
1957 const int K ( numeric_cast<int>( A.columns() ) );
1958 const int lda( numeric_cast<int>( A.spacing() ) );
1959 const int ldb( numeric_cast<int>( B.spacing() ) );
1960 const int ldc( numeric_cast<int>( C.spacing() ) );
1961 const complex<float> alpha( -1.0F, 0.0F );
1962 const complex<float> beta ( 1.0F, 0.0F );
1964 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1965 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1966 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1967 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1988 template<
typename MT3
1991 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1992 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1994 using boost::numeric_cast;
2003 const int M ( numeric_cast<int>( A.rows() ) );
2004 const int N ( numeric_cast<int>( B.columns() ) );
2005 const int K ( numeric_cast<int>( A.columns() ) );
2006 const int lda( numeric_cast<int>( A.spacing() ) );
2007 const int ldb( numeric_cast<int>( B.spacing() ) );
2008 const int ldc( numeric_cast<int>( C.spacing() ) );
2009 const complex<double> alpha( -1.0, 0.0 );
2010 const complex<double> beta ( 1.0, 0.0 );
2012 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2013 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2014 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2015 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2048 template<
typename MT
2050 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2058 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2061 else if( rhs.lhs_.columns() == 0UL ) {
2096 template<
typename MT
2098 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2103 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2115 const TmpType tmp( rhs );
2136 template<
typename MT
2138 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2146 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2184 template<
typename MT
2186 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2194 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2253 template<
typename MT1
2257 :
public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
2258 ,
private MatScalarMultExpr
2259 ,
private Computation
2263 typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
2275 enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2280 enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2288 template<
typename MT >
2289 struct UseSMPAssign {
2290 enum { value = ( evaluateLeft || evaluateRight ) };
2299 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2300 struct UseSinglePrecisionKernel {
2301 enum { value = IsFloat<typename T1::ElementType>::value &&
2302 IsFloat<typename T2::ElementType>::value &&
2303 IsFloat<typename T3::ElementType>::value &&
2304 !IsComplex<T4>::value };
2313 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2314 struct UseDoublePrecisionKernel {
2315 enum { value = IsDouble<typename T1::ElementType>::value &&
2316 IsDouble<typename T2::ElementType>::value &&
2317 IsDouble<typename T3::ElementType>::value &&
2318 !IsComplex<T4>::value };
2327 template<
typename T1,
typename T2,
typename T3 >
2328 struct UseSinglePrecisionComplexKernel {
2329 typedef complex<float> Type;
2330 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2331 IsSame<typename T2::ElementType,Type>::value &&
2332 IsSame<typename T3::ElementType,Type>::value };
2341 template<
typename T1,
typename T2,
typename T3 >
2342 struct UseDoublePrecisionComplexKernel {
2343 typedef complex<double> Type;
2344 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2345 IsSame<typename T2::ElementType,Type>::value &&
2346 IsSame<typename T3::ElementType,Type>::value };
2354 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2355 struct UseDefaultKernel {
2356 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2357 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2358 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2359 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2367 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2368 struct UseVectorizedDefaultKernel {
2369 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2370 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2371 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2372 IsSame<typename T1::ElementType,T4>::value &&
2373 IntrinsicTrait<typename T1::ElementType>::addition &&
2374 IntrinsicTrait<typename T1::ElementType>::subtraction &&
2375 IntrinsicTrait<typename T1::ElementType>::multiplication };
2381 typedef DMatScalarMultExpr<MMM,ST,true>
This;
2382 typedef typename MultTrait<RES,ST>::Type
ResultType;
2386 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2391 typedef const TDMatTDMatMultExpr<MT1,MT2>
LeftOperand;
2397 typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type
LT;
2400 typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type
RT;
2405 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2406 IsSame<ET1,ET2>::value &&
2407 IsSame<ET1,ST>::value &&
2408 IntrinsicTrait<ET1>::addition &&
2409 IntrinsicTrait<ET1>::multiplication };
2412 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2413 !evaluateRight && MT2::smpAssignable };
2422 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2438 return matrix_(i,j) * scalar_;
2447 inline size_t rows()
const {
2448 return matrix_.rows();
2457 inline size_t columns()
const {
2458 return matrix_.columns();
2488 template<
typename T >
2489 inline bool canAlias(
const T* alias )
const {
2490 return matrix_.canAlias( alias );
2500 template<
typename T >
2501 inline bool isAliased(
const T* alias )
const {
2502 return matrix_.isAliased( alias );
2512 return matrix_.isAligned();
2522 typename MMM::RightOperand B( matrix_.rightOperand() );
2547 template<
typename MT
2549 friend inline void assign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
2556 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2557 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2559 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2562 else if( left.columns() == 0UL ) {
2577 DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2592 template<
typename MT3
2596 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2599 DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2601 DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2619 template<
typename MT3
2623 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2624 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2626 for(
size_t i=0UL; i<A.rows(); ++i ) {
2627 for(
size_t k=0UL; k<B.columns(); ++k ) {
2628 C(i,k) = A(i,0UL) * B(0UL,k);
2630 for(
size_t j=1UL; j<A.columns(); ++j ) {
2631 for(
size_t k=0UL; k<B.columns(); ++k ) {
2632 C(i,k) += A(i,j) * B(j,k);
2635 for(
size_t k=0UL; k<B.columns(); ++k ) {
2656 template<
typename MT3
2660 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2661 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2666 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2668 assign( ~C, A * tmp * scalar );
2670 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2672 assign( ~C, tmp * B * scalar );
2674 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2676 assign( ~C, A * tmp * scalar );
2680 assign( ~C, tmp * B * scalar );
2699 template<
typename MT3
2703 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2704 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2706 typedef IntrinsicTrait<ElementType> IT;
2708 const size_t M( A.rows() );
2709 const size_t N( B.columns() );
2710 const size_t K( A.columns() );
2716 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2717 for(
size_t j=0UL; j<N; ++j ) {
2718 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2719 for(
size_t k=0UL; k<K; ++k ) {
2721 xmm1 = xmm1 + A.load(i ,k) * b1;
2722 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2723 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2724 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2725 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
2726 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
2727 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
2728 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
2730 (~C).
store( i , j, xmm1 * factor );
2731 (~C).
store( i+IT::size , j, xmm2 * factor );
2732 (~C).
store( i+IT::size*2UL, j, xmm3 * factor );
2733 (~C).
store( i+IT::size*3UL, j, xmm4 * factor );
2734 (~C).
store( i+IT::size*4UL, j, xmm5 * factor );
2735 (~C).
store( i+IT::size*5UL, j, xmm6 * factor );
2736 (~C).
store( i+IT::size*6UL, j, xmm7 * factor );
2737 (~C).
store( i+IT::size*7UL, j, xmm8 * factor );
2740 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2742 for( ; (j+2UL) <= N; j+=2UL ) {
2743 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2744 for(
size_t k=0UL; k<K; ++k ) {
2751 xmm1 = xmm1 + a1 * b1;
2752 xmm2 = xmm2 + a2 * b1;
2753 xmm3 = xmm3 + a3 * b1;
2754 xmm4 = xmm4 + a4 * b1;
2755 xmm5 = xmm5 + a1 * b2;
2756 xmm6 = xmm6 + a2 * b2;
2757 xmm7 = xmm7 + a3 * b2;
2758 xmm8 = xmm8 + a4 * b2;
2760 (~C).
store( i , j , xmm1 * factor );
2761 (~C).
store( i+IT::size , j , xmm2 * factor );
2762 (~C).
store( i+IT::size*2UL, j , xmm3 * factor );
2763 (~C).
store( i+IT::size*3UL, j , xmm4 * factor );
2764 (~C).
store( i , j+1UL, xmm5 * factor );
2765 (~C).
store( i+IT::size , j+1UL, xmm6 * factor );
2766 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 * factor );
2767 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 * factor );
2771 for(
size_t k=0UL; k<K; ++k ) {
2773 xmm1 = xmm1 + A.load(i ,k) * b1;
2774 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2775 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2776 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2778 (~C).
store( i , j, xmm1 * factor );
2779 (~C).
store( i+IT::size , j, xmm2 * factor );
2780 (~C).
store( i+IT::size*2UL, j, xmm3 * factor );
2781 (~C).
store( i+IT::size*3UL, j, xmm4 * factor );
2784 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2786 for( ; (j+2UL) <= N; j+=2UL ) {
2788 for(
size_t k=0UL; k<K; ++k ) {
2793 xmm1 = xmm1 + a1 * b1;
2794 xmm2 = xmm2 + a2 * b1;
2795 xmm3 = xmm3 + a1 * b2;
2796 xmm4 = xmm4 + a2 * b2;
2798 (~C).
store( i , j , xmm1 * factor );
2799 (~C).
store( i+IT::size, j , xmm2 * factor );
2800 (~C).
store( i , j+1UL, xmm3 * factor );
2801 (~C).
store( i+IT::size, j+1UL, xmm4 * factor );
2805 for(
size_t k=0UL; k<K; ++k ) {
2807 xmm1 = xmm1 + A.load(i ,k) * b1;
2808 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
2810 (~C).
store( i , j, xmm1 * factor );
2811 (~C).
store( i+IT::size, j, xmm2 * factor );
2816 for( ; (j+2UL) <= N; j+=2UL ) {
2818 for(
size_t k=0UL; k<K; ++k ) {
2820 xmm1 = xmm1 + a1 *
set( B(k,j ) );
2821 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
2823 (~C).
store( i, j , xmm1 * factor );
2824 (~C).
store( i, j+1UL, xmm2 * factor );
2828 for(
size_t k=0UL; k<K; ++k ) {
2829 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
2831 (~C).
store( i, j, xmm1 * factor );
2851 template<
typename MT3
2855 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2856 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2858 selectDefaultAssignKernel( C, A, B, scalar );
2877 template<
typename MT3
2881 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2882 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2884 using boost::numeric_cast;
2890 const int M ( numeric_cast<int>( A.rows() ) );
2891 const int N ( numeric_cast<int>( B.columns() ) );
2892 const int K ( numeric_cast<int>( A.columns() ) );
2893 const int lda( numeric_cast<int>( A.spacing() ) );
2894 const int ldb( numeric_cast<int>( B.spacing() ) );
2895 const int ldc( numeric_cast<int>( C.spacing() ) );
2897 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2898 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2899 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2900 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2920 template<
typename MT3
2924 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2925 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2927 using boost::numeric_cast;
2933 const int M ( numeric_cast<int>( A.rows() ) );
2934 const int N ( numeric_cast<int>( B.columns() ) );
2935 const int K ( numeric_cast<int>( A.columns() ) );
2936 const int lda( numeric_cast<int>( A.spacing() ) );
2937 const int ldb( numeric_cast<int>( B.spacing() ) );
2938 const int ldc( numeric_cast<int>( C.spacing() ) );
2940 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2941 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2942 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2943 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2963 template<
typename MT3
2967 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2968 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2970 using boost::numeric_cast;
2979 const int M ( numeric_cast<int>( A.rows() ) );
2980 const int N ( numeric_cast<int>( B.columns() ) );
2981 const int K ( numeric_cast<int>( A.columns() ) );
2982 const int lda( numeric_cast<int>( A.spacing() ) );
2983 const int ldb( numeric_cast<int>( B.spacing() ) );
2984 const int ldc( numeric_cast<int>( C.spacing() ) );
2985 const complex<float> alpha( scalar );
2986 const complex<float> beta ( 0.0F, 0.0F );
2988 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2989 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2990 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2991 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3011 template<
typename MT3
3015 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3016 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3018 using boost::numeric_cast;
3027 const int M ( numeric_cast<int>( A.rows() ) );
3028 const int N ( numeric_cast<int>( B.columns() ) );
3029 const int K ( numeric_cast<int>( A.columns() ) );
3030 const int lda( numeric_cast<int>( A.spacing() ) );
3031 const int ldb( numeric_cast<int>( B.spacing() ) );
3032 const int ldc( numeric_cast<int>( C.spacing() ) );
3033 const complex<double> alpha( scalar );
3034 const complex<double> beta ( 0.0, 0.0 );
3036 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3037 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3038 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3039 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3056 template<
typename MT
3058 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3062 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
3074 const TmpType tmp(
serial( rhs ) );
3091 template<
typename MT
3093 friend inline void addAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3100 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3101 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3103 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3117 DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3132 template<
typename MT3
3136 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3139 DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3141 DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3159 template<
typename MT3
3163 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3164 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3185 template<
typename MT3
3189 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3190 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3195 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3199 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3203 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3228 template<
typename MT3
3232 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3233 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3235 typedef IntrinsicTrait<ElementType> IT;
3237 const size_t M( A.rows() );
3238 const size_t N( B.columns() );
3239 const size_t K( A.columns() );
3245 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3246 for(
size_t j=0UL; j<N; ++j ) {
3247 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3248 for(
size_t k=0UL; k<K; ++k ) {
3250 xmm1 = xmm1 + A.load(i ,k) * b1;
3251 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3252 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3253 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3254 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3255 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3256 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3257 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3259 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3260 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3261 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3262 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3263 (~C).
store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) + xmm5 * factor );
3264 (~C).
store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) + xmm6 * factor );
3265 (~C).
store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) + xmm7 * factor );
3266 (~C).
store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) + xmm8 * factor );
3269 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3271 for( ; (j+2UL) <= N; j+=2UL ) {
3272 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3273 for(
size_t k=0UL; k<K; ++k ) {
3280 xmm1 = xmm1 + a1 * b1;
3281 xmm2 = xmm2 + a2 * b1;
3282 xmm3 = xmm3 + a3 * b1;
3283 xmm4 = xmm4 + a4 * b1;
3284 xmm5 = xmm5 + a1 * b2;
3285 xmm6 = xmm6 + a2 * b2;
3286 xmm7 = xmm7 + a3 * b2;
3287 xmm8 = xmm8 + a4 * b2;
3289 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3290 (~C).
store( i+IT::size , j , (~C).load(i+IT::size ,j ) + xmm2 * factor );
3291 (~C).
store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) + xmm3 * factor );
3292 (~C).
store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) + xmm4 * factor );
3293 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
3294 (~C).
store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) + xmm6 * factor );
3295 (~C).
store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) + xmm7 * factor );
3296 (~C).
store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) + xmm8 * factor );
3300 for(
size_t k=0UL; k<K; ++k ) {
3302 xmm1 = xmm1 + A.load(i ,k) * b1;
3303 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3304 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3305 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3307 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3308 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3309 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3310 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3313 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3315 for( ; (j+2UL) <= N; j+=2UL ) {
3317 for(
size_t k=0UL; k<K; ++k ) {
3322 xmm1 = xmm1 + a1 * b1;
3323 xmm2 = xmm2 + a2 * b1;
3324 xmm3 = xmm3 + a1 * b2;
3325 xmm4 = xmm4 + a2 * b2;
3327 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3328 (~C).
store( i+IT::size, j , (~C).load(i+IT::size,j ) + xmm2 * factor );
3329 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
3330 (~C).
store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) + xmm4 * factor );
3334 for(
size_t k=0UL; k<K; ++k ) {
3336 xmm1 = xmm1 + A.load(i ,k) * b1;
3337 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3339 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3340 (~C).
store( i+IT::size, j, (~C).load(i+IT::size,j) + xmm2 * factor );
3345 for( ; (j+2UL) <= N; j+=2UL ) {
3347 for(
size_t k=0UL; k<K; ++k ) {
3349 xmm1 = xmm1 + a1 *
set( B(k,j ) );
3350 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
3352 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3353 (~C).
store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
3357 for(
size_t k=0UL; k<K; ++k ) {
3358 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
3360 (~C).
store( i, j, (~C).load(i,j) + xmm1 * factor );
3380 template<
typename MT3
3384 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3385 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3387 selectDefaultAddAssignKernel( C, A, B, scalar );
3406 template<
typename MT3
3410 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3411 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3413 using boost::numeric_cast;
3419 const int M ( numeric_cast<int>( A.rows() ) );
3420 const int N ( numeric_cast<int>( B.columns() ) );
3421 const int K ( numeric_cast<int>( A.columns() ) );
3422 const int lda( numeric_cast<int>( A.spacing() ) );
3423 const int ldb( numeric_cast<int>( B.spacing() ) );
3424 const int ldc( numeric_cast<int>( C.spacing() ) );
3426 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3427 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3428 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3429 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3449 template<
typename MT3
3453 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3454 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3456 using boost::numeric_cast;
3462 const int M ( numeric_cast<int>( A.rows() ) );
3463 const int N ( numeric_cast<int>( B.columns() ) );
3464 const int K ( numeric_cast<int>( A.columns() ) );
3465 const int lda( numeric_cast<int>( A.spacing() ) );
3466 const int ldb( numeric_cast<int>( B.spacing() ) );
3467 const int ldc( numeric_cast<int>( C.spacing() ) );
3469 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3470 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3471 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3472 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3492 template<
typename MT3
3496 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3497 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3499 using boost::numeric_cast;
3508 const int M ( numeric_cast<int>( A.rows() ) );
3509 const int N ( numeric_cast<int>( B.columns() ) );
3510 const int K ( numeric_cast<int>( A.columns() ) );
3511 const int lda( numeric_cast<int>( A.spacing() ) );
3512 const int ldb( numeric_cast<int>( B.spacing() ) );
3513 const int ldc( numeric_cast<int>( C.spacing() ) );
3514 const complex<float> alpha( scalar );
3515 const complex<float> beta ( 1.0F, 0.0F );
3517 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3518 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3519 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3520 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3540 template<
typename MT3
3544 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3545 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3547 using boost::numeric_cast;
3556 const int M ( numeric_cast<int>( A.rows() ) );
3557 const int N ( numeric_cast<int>( B.columns() ) );
3558 const int K ( numeric_cast<int>( A.columns() ) );
3559 const int lda( numeric_cast<int>( A.spacing() ) );
3560 const int ldb( numeric_cast<int>( B.spacing() ) );
3561 const int ldc( numeric_cast<int>( C.spacing() ) );
3562 const complex<double> alpha( scalar );
3563 const complex<double> beta ( 1.0, 0.0 );
3565 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3566 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3567 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3568 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3589 template<
typename MT
3591 friend inline void subAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3598 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3599 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3601 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3615 DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3630 template<
typename MT3
3634 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3637 DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3639 DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3657 template<
typename MT3
3661 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3662 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3683 template<
typename MT3
3687 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3688 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3693 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3697 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3701 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3726 template<
typename MT3
3730 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3731 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3733 typedef IntrinsicTrait<ElementType> IT;
3735 const size_t M( A.rows() );
3736 const size_t N( B.columns() );
3737 const size_t K( A.columns() );
3743 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3744 for(
size_t j=0UL; j<N; ++j ) {
3745 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3746 for(
size_t k=0UL; k<K; ++k ) {
3748 xmm1 = xmm1 + A.load(i ,k) * b1;
3749 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3750 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3751 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3752 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3753 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3754 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3755 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3757 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3758 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3759 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3760 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3761 (~C).
store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) - xmm5 * factor );
3762 (~C).
store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) - xmm6 * factor );
3763 (~C).
store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) - xmm7 * factor );
3764 (~C).
store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) - xmm8 * factor );
3767 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3769 for( ; (j+2UL) <= N; j+=2UL ) {
3770 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3771 for(
size_t k=0UL; k<K; ++k ) {
3778 xmm1 = xmm1 + a1 * b1;
3779 xmm2 = xmm2 + a2 * b1;
3780 xmm3 = xmm3 + a3 * b1;
3781 xmm4 = xmm4 + a4 * b1;
3782 xmm5 = xmm5 + a1 * b2;
3783 xmm6 = xmm6 + a2 * b2;
3784 xmm7 = xmm7 + a3 * b2;
3785 xmm8 = xmm8 + a4 * b2;
3787 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3788 (~C).
store( i+IT::size , j , (~C).load(i+IT::size ,j ) - xmm2 * factor );
3789 (~C).
store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) - xmm3 * factor );
3790 (~C).
store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) - xmm4 * factor );
3791 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
3792 (~C).
store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) - xmm6 * factor );
3793 (~C).
store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) - xmm7 * factor );
3794 (~C).
store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) - xmm8 * factor );
3798 for(
size_t k=0UL; k<K; ++k ) {
3800 xmm1 = xmm1 + A.load(i ,k) * b1;
3801 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3802 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3803 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3805 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3806 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3807 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3808 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3811 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3813 for( ; (j+2UL) <= N; j+=2UL ) {
3815 for(
size_t k=0UL; k<K; ++k ) {
3820 xmm1 = xmm1 + a1 * b1;
3821 xmm2 = xmm2 + a2 * b1;
3822 xmm3 = xmm3 + a1 * b2;
3823 xmm4 = xmm4 + a2 * b2;
3825 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3826 (~C).
store( i+IT::size, j , (~C).load(i+IT::size,j ) - xmm2 * factor );
3827 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
3828 (~C).
store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) - xmm4 * factor );
3832 for(
size_t k=0UL; k<K; ++k ) {
3834 xmm1 = xmm1 + A.load(i ,k) * b1;
3835 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3837 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3838 (~C).
store( i+IT::size, j, (~C).load(i+IT::size,j) - xmm2 * factor );
3843 for( ; (j+2UL) <= N; j+=2UL ) {
3845 for(
size_t k=0UL; k<K; ++k ) {
3847 xmm1 = xmm1 + a1 *
set( B(k,j ) );
3848 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
3850 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
3851 (~C).
store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
3855 for(
size_t k=0UL; k<K; ++k ) {
3856 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
3858 (~C).
store( i, j, (~C).load(i,j) - xmm1 * factor );
3878 template<
typename MT3
3882 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3883 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3885 selectDefaultSubAssignKernel( C, A, B, scalar );
3904 template<
typename MT3
3908 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3909 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3911 using boost::numeric_cast;
3917 const int M ( numeric_cast<int>( A.rows() ) );
3918 const int N ( numeric_cast<int>( B.columns() ) );
3919 const int K ( numeric_cast<int>( A.columns() ) );
3920 const int lda( numeric_cast<int>( A.spacing() ) );
3921 const int ldb( numeric_cast<int>( B.spacing() ) );
3922 const int ldc( numeric_cast<int>( C.spacing() ) );
3924 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3925 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3926 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3927 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3947 template<
typename MT3
3951 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3952 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3954 using boost::numeric_cast;
3960 const int M ( numeric_cast<int>( A.rows() ) );
3961 const int N ( numeric_cast<int>( B.columns() ) );
3962 const int K ( numeric_cast<int>( A.columns() ) );
3963 const int lda( numeric_cast<int>( A.spacing() ) );
3964 const int ldb( numeric_cast<int>( B.spacing() ) );
3965 const int ldc( numeric_cast<int>( C.spacing() ) );
3967 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3968 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3969 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3970 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3990 template<
typename MT3
3994 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3995 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3997 using boost::numeric_cast;
4006 const int M ( numeric_cast<int>( A.rows() ) );
4007 const int N ( numeric_cast<int>( B.columns() ) );
4008 const int K ( numeric_cast<int>( A.columns() ) );
4009 const int lda( numeric_cast<int>( A.spacing() ) );
4010 const int ldb( numeric_cast<int>( B.spacing() ) );
4011 const int ldc( numeric_cast<int>( C.spacing() ) );
4012 const complex<float> alpha( -scalar );
4013 const complex<float> beta ( 1.0F, 0.0F );
4015 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4016 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4017 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4018 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4038 template<
typename MT3
4042 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4043 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4045 using boost::numeric_cast;
4054 const int M ( numeric_cast<int>( A.rows() ) );
4055 const int N ( numeric_cast<int>( B.columns() ) );
4056 const int K ( numeric_cast<int>( A.columns() ) );
4057 const int lda( numeric_cast<int>( A.spacing() ) );
4058 const int ldb( numeric_cast<int>( B.spacing() ) );
4059 const int ldc( numeric_cast<int>( C.spacing() ) );
4060 const complex<double> alpha( -scalar );
4061 const complex<double> beta ( 1.0, 0.0 );
4063 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4064 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4065 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4066 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4098 template<
typename MT
4100 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4101 smpAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4108 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4109 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4111 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
4114 else if( left.columns() == 0UL ) {
4148 template<
typename MT
4150 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4151 smpAssign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4155 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
4167 const TmpType tmp( rhs );
4187 template<
typename MT
4189 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4190 smpAddAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4197 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4198 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4200 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4237 template<
typename MT
4239 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4240 smpSubAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4247 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4248 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4250 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4329 template<
typename T1
4331 inline const TDMatTDMatMultExpr<T1,T2>
4337 throw std::invalid_argument(
"Matrix sizes do not match" );
4354 template<
typename MT1,
typename MT2,
typename VT >
4359 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4360 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4361 IsDenseVector<VT>::value && IsColumnVector<VT>::value
4362 ,
typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4363 , INVALID_TYPE >::Type Type;
4372 template<
typename MT1,
typename MT2,
typename VT >
4377 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4378 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4379 IsSparseVector<VT>::value && IsColumnVector<VT>::value
4380 ,
typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4381 , INVALID_TYPE >::Type Type;
4390 template<
typename VT,
typename MT1,
typename MT2 >
4395 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4396 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4397 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4398 ,
typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4399 , INVALID_TYPE >::Type Type;
4408 template<
typename VT,
typename MT1,
typename MT2 >
4413 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4414 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4415 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4416 ,
typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4417 , INVALID_TYPE >::Type Type;
4426 template<
typename MT1,
typename MT2,
bool AF >
4431 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4432 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4441 template<
typename MT1,
typename MT2 >
4446 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4455 template<
typename MT1,
typename MT2 >
4460 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:253
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:123
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4599
EnableIf< IsIntegral< T > >::Type store(T *address, const typename Store< T, sizeof(T)>::Type &value)
Aligned store of a vector of integral values.
Definition: Store.h:223
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4329
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:249
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:121
const size_t TDMATTDMATMULT_THRESHOLD
Column-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies t...
Definition: Thresholds.h:176
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:199
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:411
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:122
Header file for the IsColumnMajorMatrix type trait.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTDMatMultExpr.h:392
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2408
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:251
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:244
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:298
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:249
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:690
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:131
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:370
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2404
Header file for the IsFloat type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:348
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:246
TDMatTDMatMultExpr< MT1, MT2 > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:243
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
const size_t SMP_TDMATTDMATMULT_THRESHOLD
SMP column-major dense matrix/column-major dense matrix multiplication threshold.This threshold speci...
Definition: Thresholds.h:903
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the multiplication trait.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:118
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TSVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:256
Header file for the TDMatSVecMultExprTrait class template.
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:358
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:250
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
Header file for the serial shim.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:382
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
Header file for the IsNumeric type trait.
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:245
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:247
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:328
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:259
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:283
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:119
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:250
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2403
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the complex data type.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:262
Header file for the IsColumnVector type trait.
Header file for the IsResizable type trait.
Constraint on the data type.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTDMatMultExpr.h:402
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:338
Header file for the IsExpression type trait class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:120
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTDMatMultExpr.h:248
Header file for the FunctionTrace class.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:412