35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
111 template<
typename MT1
113 class TDMatTDMatMultExpr :
public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
114 ,
private MatMatMultExpr
115 ,
private Computation
142 template<
typename T1,
typename T2,
typename T3 >
143 struct UseSMPAssignKernel {
144 enum { value = evaluateLeft || evaluateRight };
154 template<
typename T1,
typename T2,
typename T3 >
155 struct UseSinglePrecisionKernel {
156 enum { value = IsFloat<typename T1::ElementType>::value &&
157 IsFloat<typename T2::ElementType>::value &&
158 IsFloat<typename T3::ElementType>::value };
168 template<
typename T1,
typename T2,
typename T3 >
169 struct UseDoublePrecisionKernel {
170 enum { value = IsDouble<typename T1::ElementType>::value &&
171 IsDouble<typename T2::ElementType>::value &&
172 IsDouble<typename T3::ElementType>::value };
183 template<
typename T1,
typename T2,
typename T3 >
184 struct UseSinglePrecisionComplexKernel {
185 typedef complex<float> Type;
186 enum { value = IsSame<typename T1::ElementType,Type>::value &&
187 IsSame<typename T2::ElementType,Type>::value &&
188 IsSame<typename T3::ElementType,Type>::value };
199 template<
typename T1,
typename T2,
typename T3 >
200 struct UseDoublePrecisionComplexKernel {
201 typedef complex<double> Type;
202 enum { value = IsSame<typename T1::ElementType,Type>::value &&
203 IsSame<typename T2::ElementType,Type>::value &&
204 IsSame<typename T3::ElementType,Type>::value };
214 template<
typename T1,
typename T2,
typename T3 >
215 struct UseDefaultKernel {
216 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
217 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
218 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
219 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
229 template<
typename T1,
typename T2,
typename T3 >
230 struct UseVectorizedDefaultKernel {
231 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
232 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
233 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
234 IntrinsicTrait<typename T1::ElementType>::addition &&
235 IntrinsicTrait<typename T1::ElementType>::subtraction &&
236 IntrinsicTrait<typename T1::ElementType>::multiplication };
267 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
273 enum { smpAssignable = !evaluateLeft && !evaluateRight };
303 if(
lhs_.columns() != 0UL ) {
304 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
306 for(
size_t k=1UL; k<end; k+=2UL ) {
308 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
310 if( end <
lhs_.columns() ) {
338 return rhs_.columns();
368 template<
typename T >
370 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
380 template<
typename T >
382 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
392 return lhs_.isAligned() &&
rhs_.isAligned();
424 template<
typename MT
433 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
436 else if( rhs.lhs_.columns() == 0UL ) {
451 TDMatTDMatMultExpr::selectAssignKernel( ~lhs, A, B );
467 template<
typename MT3
471 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
474 TDMatTDMatMultExpr::selectDefaultAssignKernel( C, A, B );
476 TDMatTDMatMultExpr::selectBlasAssignKernel( C, A, B );
492 template<
typename MT3
495 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
496 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
517 template<
typename MT3
520 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
521 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
523 const size_t M( A.rows() );
524 const size_t N( B.columns() );
525 const size_t K( A.columns() );
527 for(
size_t i=0UL; i<M; ++i ) {
528 for(
size_t j=0UL; j<N; ++j ) {
529 C(i,j) = A(i,0UL) * B(0UL,j);
531 for(
size_t k=1UL; k<K; ++k ) {
532 for(
size_t j=0UL; j<N; ++j ) {
533 C(i,j) += A(i,k) * B(k,j);
555 template<
typename MT3
558 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
559 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
564 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
568 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
572 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
598 template<
typename MT3
601 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
602 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
604 typedef IntrinsicTrait<ElementType> IT;
606 const size_t M( A.rows() );
607 const size_t N( B.columns() );
608 const size_t K( A.columns() );
612 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
613 for(
size_t j=0UL; j<N; ++j ) {
614 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
615 for(
size_t k=0UL; k<K; ++k ) {
617 xmm1 = xmm1 + A.load(i ,k) * b1;
618 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
619 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
620 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
621 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
622 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
623 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
624 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
626 (~C).
store( i , j, xmm1 );
627 (~C).
store( i+IT::size , j, xmm2 );
628 (~C).
store( i+IT::size*2UL, j, xmm3 );
629 (~C).
store( i+IT::size*3UL, j, xmm4 );
630 (~C).
store( i+IT::size*4UL, j, xmm5 );
631 (~C).
store( i+IT::size*5UL, j, xmm6 );
632 (~C).
store( i+IT::size*6UL, j, xmm7 );
633 (~C).
store( i+IT::size*7UL, j, xmm8 );
636 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
638 for( ; (j+2UL) <= N; j+=2UL ) {
639 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
640 for(
size_t k=0UL; k<K; ++k ) {
647 xmm1 = xmm1 + a1 * b1;
648 xmm2 = xmm2 + a2 * b1;
649 xmm3 = xmm3 + a3 * b1;
650 xmm4 = xmm4 + a4 * b1;
651 xmm5 = xmm5 + a1 * b2;
652 xmm6 = xmm6 + a2 * b2;
653 xmm7 = xmm7 + a3 * b2;
654 xmm8 = xmm8 + a4 * b2;
656 (~C).
store( i , j , xmm1 );
657 (~C).
store( i+IT::size , j , xmm2 );
658 (~C).
store( i+IT::size*2UL, j , xmm3 );
659 (~C).
store( i+IT::size*3UL, j , xmm4 );
660 (~C).
store( i , j+1UL, xmm5 );
661 (~C).
store( i+IT::size , j+1UL, xmm6 );
662 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 );
663 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 );
667 for(
size_t k=0UL; k<K; ++k ) {
669 xmm1 = xmm1 + A.load(i ,k) * b1;
670 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
671 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
672 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
674 (~C).
store( i , j, xmm1 );
675 (~C).
store( i+IT::size , j, xmm2 );
676 (~C).
store( i+IT::size*2UL, j, xmm3 );
677 (~C).
store( i+IT::size*3UL, j, xmm4 );
680 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
682 for( ; (j+2UL) <= N; j+=2UL ) {
684 for(
size_t k=0UL; k<K; ++k ) {
689 xmm1 = xmm1 + a1 * b1;
690 xmm2 = xmm2 + a2 * b1;
691 xmm3 = xmm3 + a1 * b2;
692 xmm4 = xmm4 + a2 * b2;
694 (~C).
store( i , j , xmm1 );
695 (~C).
store( i+IT::size, j , xmm2 );
696 (~C).
store( i , j+1UL, xmm3 );
697 (~C).
store( i+IT::size, j+1UL, xmm4 );
701 for(
size_t k=0UL; k<K; ++k ) {
703 xmm1 = xmm1 + A.load(i ,k) * b1;
704 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
706 (~C).
store( i , j, xmm1 );
707 (~C).
store( i+IT::size, j, xmm2 );
712 for( ; (j+2UL) <= N; j+=2UL ) {
714 for(
size_t k=0UL; k<K; ++k ) {
716 xmm1 = xmm1 + a1 *
set( B(k,j ) );
717 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
719 (~C).
store( i, j , xmm1 );
720 (~C).
store( i, j+1UL, xmm2 );
724 for(
size_t k=0UL; k<K; ++k ) {
725 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
727 (~C).
store( i, j, xmm1 );
748 template<
typename MT3
751 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
752 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
754 selectDefaultAssignKernel( C, A, B );
774 template<
typename MT3
777 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
778 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
780 using boost::numeric_cast;
786 const int M ( numeric_cast<int>( A.rows() ) );
787 const int N ( numeric_cast<int>( B.columns() ) );
788 const int K ( numeric_cast<int>( A.columns() ) );
789 const int lda( numeric_cast<int>( A.spacing() ) );
790 const int ldb( numeric_cast<int>( B.spacing() ) );
791 const int ldc( numeric_cast<int>( C.spacing() ) );
793 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
794 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
795 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
796 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
817 template<
typename MT3
820 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
821 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
823 using boost::numeric_cast;
829 const int M ( numeric_cast<int>( A.rows() ) );
830 const int N ( numeric_cast<int>( B.columns() ) );
831 const int K ( numeric_cast<int>( A.columns() ) );
832 const int lda( numeric_cast<int>( A.spacing() ) );
833 const int ldb( numeric_cast<int>( B.spacing() ) );
834 const int ldc( numeric_cast<int>( C.spacing() ) );
836 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
837 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
838 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
839 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
860 template<
typename MT3
863 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
864 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
866 using boost::numeric_cast;
875 const int M ( numeric_cast<int>( A.rows() ) );
876 const int N ( numeric_cast<int>( B.columns() ) );
877 const int K ( numeric_cast<int>( A.columns() ) );
878 const int lda( numeric_cast<int>( A.spacing() ) );
879 const int ldb( numeric_cast<int>( B.spacing() ) );
880 const int ldc( numeric_cast<int>( C.spacing() ) );
881 complex<float> alpha( 1.0F, 0.0F );
882 complex<float> beta ( 0.0F, 0.0F );
884 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
885 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
886 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
887 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
908 template<
typename MT3
911 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
912 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
914 using boost::numeric_cast;
923 const int M ( numeric_cast<int>( A.rows() ) );
924 const int N ( numeric_cast<int>( B.columns() ) );
925 const int K ( numeric_cast<int>( A.columns() ) );
926 const int lda( numeric_cast<int>( A.spacing() ) );
927 const int ldb( numeric_cast<int>( B.spacing() ) );
928 const int ldc( numeric_cast<int>( C.spacing() ) );
929 complex<double> alpha( 1.0, 0.0 );
930 complex<double> beta ( 0.0, 0.0 );
932 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
933 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
934 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
935 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
954 template<
typename MT
960 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
972 const TmpType tmp( rhs );
991 template<
typename MT
1000 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1014 TDMatTDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1030 template<
typename MT3
1033 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1034 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1037 TDMatTDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1039 TDMatTDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1055 template<
typename MT3
1058 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1059 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1080 template<
typename MT3
1083 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1084 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1086 const size_t M( A.rows() );
1087 const size_t N( B.columns() );
1088 const size_t K( A.columns() );
1091 const size_t end( N &
size_t(-2) );
1093 for(
size_t i=0UL; i<M; ++i ) {
1094 for(
size_t k=0UL; k<K; ++k ) {
1095 for(
size_t j=0UL; j<end; j+=2UL ) {
1096 C(i,j ) += A(i,k) * B(k,j );
1097 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1100 C(i,end) += A(i,k) * B(k,end);
1122 template<
typename MT3
1125 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1126 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1131 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1135 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1139 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1165 template<
typename MT3
1168 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1169 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1171 typedef IntrinsicTrait<ElementType> IT;
1173 const size_t M( A.rows() );
1174 const size_t N( B.columns() );
1175 const size_t K( A.columns() );
1179 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1180 for(
size_t j=0UL; j<N; ++j ) {
1189 for(
size_t k=0UL; k<K; ++k ) {
1191 xmm1 = xmm1 + A.load(i ,k) * b1;
1192 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1193 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1194 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1195 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
1196 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
1197 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
1198 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
1200 (~C).
store( i , j, xmm1 );
1201 (~C).
store( i+IT::size , j, xmm2 );
1202 (~C).
store( i+IT::size*2UL, j, xmm3 );
1203 (~C).
store( i+IT::size*3UL, j, xmm4 );
1204 (~C).
store( i+IT::size*4UL, j, xmm5 );
1205 (~C).
store( i+IT::size*5UL, j, xmm6 );
1206 (~C).
store( i+IT::size*6UL, j, xmm7 );
1207 (~C).
store( i+IT::size*7UL, j, xmm8 );
1210 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1212 for( ; (j+2UL) <= N; j+=2UL ) {
1221 for(
size_t k=0UL; k<K; ++k ) {
1228 xmm1 = xmm1 + a1 * b1;
1229 xmm2 = xmm2 + a2 * b1;
1230 xmm3 = xmm3 + a3 * b1;
1231 xmm4 = xmm4 + a4 * b1;
1232 xmm5 = xmm5 + a1 * b2;
1233 xmm6 = xmm6 + a2 * b2;
1234 xmm7 = xmm7 + a3 * b2;
1235 xmm8 = xmm8 + a4 * b2;
1237 (~C).
store( i , j , xmm1 );
1238 (~C).
store( i+IT::size , j , xmm2 );
1239 (~C).
store( i+IT::size*2UL, j , xmm3 );
1240 (~C).
store( i+IT::size*3UL, j , xmm4 );
1241 (~C).
store( i , j+1UL, xmm5 );
1242 (~C).
store( i+IT::size , j+1UL, xmm6 );
1243 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 );
1244 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 );
1251 for(
size_t k=0UL; k<K; ++k ) {
1253 xmm1 = xmm1 + A.load(i ,k) * b1;
1254 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1255 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1256 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1258 (~C).
store( i , j, xmm1 );
1259 (~C).
store( i+IT::size , j, xmm2 );
1260 (~C).
store( i+IT::size*2UL, j, xmm3 );
1261 (~C).
store( i+IT::size*3UL, j, xmm4 );
1264 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1266 for( ; (j+2UL) <= N; j+=2UL ) {
1271 for(
size_t k=0UL; k<K; ++k ) {
1276 xmm1 = xmm1 + a1 * b1;
1277 xmm2 = xmm2 + a2 * b1;
1278 xmm3 = xmm3 + a1 * b2;
1279 xmm4 = xmm4 + a2 * b2;
1281 (~C).
store( i , j , xmm1 );
1282 (~C).
store( i+IT::size, j , xmm2 );
1283 (~C).
store( i , j+1UL, xmm3 );
1284 (~C).
store( i+IT::size, j+1UL, xmm4 );
1289 for(
size_t k=0UL; k<K; ++k ) {
1291 xmm1 = xmm1 + A.load(i ,k) * b1;
1292 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1294 (~C).
store( i , j, xmm1 );
1295 (~C).
store( i+IT::size, j, xmm2 );
1300 for( ; (j+2UL) <= N; j+=2UL ) {
1303 for(
size_t k=0UL; k<K; ++k ) {
1305 xmm1 = xmm1 + a1 *
set( B(k,j ) );
1306 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
1308 (~C).
store( i, j , xmm1 );
1309 (~C).
store( i, j+1UL, xmm2 );
1313 for(
size_t k=0UL; k<K; ++k ) {
1314 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
1316 (~C).
store( i, j, xmm1 );
1337 template<
typename MT3
1340 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1341 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1343 selectDefaultAddAssignKernel( C, A, B );
1363 template<
typename MT3
1366 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1367 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1369 using boost::numeric_cast;
1375 const int M ( numeric_cast<int>( A.rows() ) );
1376 const int N ( numeric_cast<int>( B.columns() ) );
1377 const int K ( numeric_cast<int>( A.columns() ) );
1378 const int lda( numeric_cast<int>( A.spacing() ) );
1379 const int ldb( numeric_cast<int>( B.spacing() ) );
1380 const int ldc( numeric_cast<int>( C.spacing() ) );
1382 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1383 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1384 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1385 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1406 template<
typename MT3
1409 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1410 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1412 using boost::numeric_cast;
1418 const int M ( numeric_cast<int>( A.rows() ) );
1419 const int N ( numeric_cast<int>( B.columns() ) );
1420 const int K ( numeric_cast<int>( A.columns() ) );
1421 const int lda( numeric_cast<int>( A.spacing() ) );
1422 const int ldb( numeric_cast<int>( B.spacing() ) );
1423 const int ldc( numeric_cast<int>( C.spacing() ) );
1425 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1426 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1427 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1428 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1449 template<
typename MT3
1452 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1453 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1455 using boost::numeric_cast;
1464 const int M ( numeric_cast<int>( A.rows() ) );
1465 const int N ( numeric_cast<int>( B.columns() ) );
1466 const int K ( numeric_cast<int>( A.columns() ) );
1467 const int lda( numeric_cast<int>( A.spacing() ) );
1468 const int ldb( numeric_cast<int>( B.spacing() ) );
1469 const int ldc( numeric_cast<int>( C.spacing() ) );
1470 const complex<float> alpha( 1.0F, 0.0F );
1471 const complex<float> beta ( 1.0F, 0.0F );
1473 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1474 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1475 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1476 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1497 template<
typename MT3
1500 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1501 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1503 using boost::numeric_cast;
1512 const int M ( numeric_cast<int>( A.rows() ) );
1513 const int N ( numeric_cast<int>( B.columns() ) );
1514 const int K ( numeric_cast<int>( A.columns() ) );
1515 const int lda( numeric_cast<int>( A.spacing() ) );
1516 const int ldb( numeric_cast<int>( B.spacing() ) );
1517 const int ldc( numeric_cast<int>( C.spacing() ) );
1518 const complex<double> alpha( 1.0, 0.0 );
1519 const complex<double> beta ( 1.0, 0.0 );
1521 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1522 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1523 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1524 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1547 template<
typename MT
1556 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1570 TDMatTDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1586 template<
typename MT3
1589 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1590 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1593 TDMatTDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1595 TDMatTDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1611 template<
typename MT3
1614 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5> >::Type
1615 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1636 template<
typename MT3
1639 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1640 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1642 const size_t M( A.rows() );
1643 const size_t N( B.columns() );
1644 const size_t K( A.columns() );
1647 const size_t end( N &
size_t(-2) );
1649 for(
size_t i=0UL; i<M; ++i ) {
1650 for(
size_t k=0UL; k<K; ++k ) {
1651 for(
size_t j=0UL; j<end; j+=2UL ) {
1652 C(i,j ) -= A(i,k) * B(k,j );
1653 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1656 C(i,end) -= A(i,k) * B(k,end);
1678 template<
typename MT3
1681 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1682 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1687 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1691 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1695 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1721 template<
typename MT3
1724 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1725 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1727 typedef IntrinsicTrait<ElementType> IT;
1729 const size_t M( A.rows() );
1730 const size_t N( B.columns() );
1731 const size_t K( A.columns() );
1735 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1736 for(
size_t j=0UL; j<N; ++j ) {
1745 for(
size_t k=0UL; k<K; ++k ) {
1747 xmm1 = xmm1 - A.load(i ,k) * b1;
1748 xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1749 xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1750 xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1751 xmm5 = xmm5 - A.load(i+IT::size*4UL,k) * b1;
1752 xmm6 = xmm6 - A.load(i+IT::size*5UL,k) * b1;
1753 xmm7 = xmm7 - A.load(i+IT::size*6UL,k) * b1;
1754 xmm8 = xmm8 - A.load(i+IT::size*7UL,k) * b1;
1756 (~C).
store( i , j, xmm1 );
1757 (~C).
store( i+IT::size , j, xmm2 );
1758 (~C).
store( i+IT::size*2UL, j, xmm3 );
1759 (~C).
store( i+IT::size*3UL, j, xmm4 );
1760 (~C).
store( i+IT::size*4UL, j, xmm5 );
1761 (~C).
store( i+IT::size*5UL, j, xmm6 );
1762 (~C).
store( i+IT::size*6UL, j, xmm7 );
1763 (~C).
store( i+IT::size*7UL, j, xmm8 );
1766 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1768 for( ; (j+2UL) <= N; j+=2UL ) {
1777 for(
size_t k=0UL; k<K; ++k ) {
1784 xmm1 = xmm1 - a1 * b1;
1785 xmm2 = xmm2 - a2 * b1;
1786 xmm3 = xmm3 - a3 * b1;
1787 xmm4 = xmm4 - a4 * b1;
1788 xmm5 = xmm5 - a1 * b2;
1789 xmm6 = xmm6 - a2 * b2;
1790 xmm7 = xmm7 - a3 * b2;
1791 xmm8 = xmm8 - a4 * b2;
1793 (~C).
store( i , j , xmm1 );
1794 (~C).
store( i+IT::size , j , xmm2 );
1795 (~C).
store( i+IT::size*2UL, j , xmm3 );
1796 (~C).
store( i+IT::size*3UL, j , xmm4 );
1797 (~C).
store( i , j+1UL, xmm5 );
1798 (~C).
store( i+IT::size , j+1UL, xmm6 );
1799 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 );
1800 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 );
1807 for(
size_t k=0UL; k<K; ++k ) {
1809 xmm1 = xmm1 - A.load(i ,k) * b1;
1810 xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1811 xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1812 xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1814 (~C).
store( i , j, xmm1 );
1815 (~C).
store( i+IT::size , j, xmm2 );
1816 (~C).
store( i+IT::size*2UL, j, xmm3 );
1817 (~C).
store( i+IT::size*3UL, j, xmm4 );
1820 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1822 for( ; (j+2UL) <= N; j+=2UL ) {
1827 for(
size_t k=0UL; k<K; ++k ) {
1832 xmm1 = xmm1 - a1 * b1;
1833 xmm2 = xmm2 - a2 * b1;
1834 xmm3 = xmm3 - a1 * b2;
1835 xmm4 = xmm4 - a2 * b2;
1837 (~C).
store( i , j , xmm1 );
1838 (~C).
store( i+IT::size, j , xmm2 );
1839 (~C).
store( i , j+1UL, xmm3 );
1840 (~C).
store( i+IT::size, j+1UL, xmm4 );
1845 for(
size_t k=0UL; k<K; ++k ) {
1847 xmm1 = xmm1 - A.load(i ,k) * b1;
1848 xmm2 = xmm2 - A.load(i+IT::size,k) * b1;
1850 (~C).
store( i , j, xmm1 );
1851 (~C).
store( i+IT::size, j, xmm2 );
1856 for( ; (j+2UL) <= N; j+=2UL ) {
1859 for(
size_t k=0UL; k<K; ++k ) {
1861 xmm1 = xmm1 - a1 *
set( B(k,j ) );
1862 xmm2 = xmm2 - a1 *
set( B(k,j+1UL) );
1864 (~C).
store( i, j , xmm1 );
1865 (~C).
store( i, j+1UL, xmm2 );
1869 for(
size_t k=0UL; k<K; ++k ) {
1870 xmm1 = xmm1 - A.load(i,k) *
set( B(k,j) );
1872 (~C).
store( i, j, xmm1 );
1893 template<
typename MT3
1896 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1897 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1899 selectDefaultSubAssignKernel( C, A, B );
1919 template<
typename MT3
1922 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1923 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1925 using boost::numeric_cast;
1931 const int M ( numeric_cast<int>( A.rows() ) );
1932 const int N ( numeric_cast<int>( B.columns() ) );
1933 const int K ( numeric_cast<int>( A.columns() ) );
1934 const int lda( numeric_cast<int>( A.spacing() ) );
1935 const int ldb( numeric_cast<int>( B.spacing() ) );
1936 const int ldc( numeric_cast<int>( C.spacing() ) );
1938 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1939 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1940 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1941 M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1962 template<
typename MT3
1965 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1966 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1968 using boost::numeric_cast;
1974 const int M ( numeric_cast<int>( A.rows() ) );
1975 const int N ( numeric_cast<int>( B.columns() ) );
1976 const int K ( numeric_cast<int>( A.columns() ) );
1977 const int lda( numeric_cast<int>( A.spacing() ) );
1978 const int ldb( numeric_cast<int>( B.spacing() ) );
1979 const int ldc( numeric_cast<int>( C.spacing() ) );
1981 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1982 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1983 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1984 M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
2005 template<
typename MT3
2008 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2009 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2011 using boost::numeric_cast;
2020 const int M ( numeric_cast<int>( A.rows() ) );
2021 const int N ( numeric_cast<int>( B.columns() ) );
2022 const int K ( numeric_cast<int>( A.columns() ) );
2023 const int lda( numeric_cast<int>( A.spacing() ) );
2024 const int ldb( numeric_cast<int>( B.spacing() ) );
2025 const int ldc( numeric_cast<int>( C.spacing() ) );
2026 const complex<float> alpha( -1.0F, 0.0F );
2027 const complex<float> beta ( 1.0F, 0.0F );
2029 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2030 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2031 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2032 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2053 template<
typename MT3
2056 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2057 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
2059 using boost::numeric_cast;
2068 const int M ( numeric_cast<int>( A.rows() ) );
2069 const int N ( numeric_cast<int>( B.columns() ) );
2070 const int K ( numeric_cast<int>( A.columns() ) );
2071 const int lda( numeric_cast<int>( A.spacing() ) );
2072 const int ldb( numeric_cast<int>( B.spacing() ) );
2073 const int ldc( numeric_cast<int>( C.spacing() ) );
2074 const complex<double> alpha( -1.0, 0.0 );
2075 const complex<double> beta ( 1.0, 0.0 );
2077 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2078 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2079 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2080 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2126 template<
typename MT1
2130 :
public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
2131 ,
private MatScalarMultExpr
2132 ,
private Computation
2136 typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
2148 enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2153 enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2160 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2161 struct UseSMPAssignKernel {
2162 enum { value = evaluateLeft || evaluateRight };
2171 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2172 struct UseSinglePrecisionKernel {
2173 enum { value = IsFloat<typename T1::ElementType>::value &&
2174 IsFloat<typename T2::ElementType>::value &&
2175 IsFloat<typename T3::ElementType>::value &&
2176 !IsComplex<T4>::value };
2185 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2186 struct UseDoublePrecisionKernel {
2187 enum { value = IsDouble<typename T1::ElementType>::value &&
2188 IsDouble<typename T2::ElementType>::value &&
2189 IsDouble<typename T3::ElementType>::value &&
2190 !IsComplex<T4>::value };
2199 template<
typename T1,
typename T2,
typename T3 >
2200 struct UseSinglePrecisionComplexKernel {
2201 typedef complex<float> Type;
2202 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2203 IsSame<typename T2::ElementType,Type>::value &&
2204 IsSame<typename T3::ElementType,Type>::value };
2213 template<
typename T1,
typename T2,
typename T3 >
2214 struct UseDoublePrecisionComplexKernel {
2215 typedef complex<double> Type;
2216 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2217 IsSame<typename T2::ElementType,Type>::value &&
2218 IsSame<typename T3::ElementType,Type>::value };
2226 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2227 struct UseDefaultKernel {
2228 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2229 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2230 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2231 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2239 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2240 struct UseVectorizedDefaultKernel {
2241 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2242 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2243 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2244 IsSame<typename T1::ElementType,T4>::value &&
2245 IntrinsicTrait<typename T1::ElementType>::addition &&
2246 IntrinsicTrait<typename T1::ElementType>::subtraction &&
2247 IntrinsicTrait<typename T1::ElementType>::multiplication };
2253 typedef DMatScalarMultExpr<MMM,ST,true>
This;
2254 typedef typename MultTrait<RES,ST>::Type
ResultType;
2258 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2263 typedef const TDMatTDMatMultExpr<MT1,MT2>
LeftOperand;
2269 typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type
LT;
2272 typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type
RT;
2277 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2278 IsSame<ET1,ET2>::value &&
2279 IsSame<ET1,ST>::value &&
2280 IntrinsicTrait<ET1>::addition &&
2281 IntrinsicTrait<ET1>::multiplication };
2284 enum { smpAssignable = !evaluateLeft && !evaluateRight };
2293 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2309 return matrix_(i,j) * scalar_;
2318 inline size_t rows()
const {
2319 return matrix_.rows();
2328 inline size_t columns()
const {
2329 return matrix_.columns();
2359 template<
typename T >
2360 inline bool canAlias(
const T* alias )
const {
2361 return matrix_.canAlias( alias );
2371 template<
typename T >
2372 inline bool isAliased(
const T* alias )
const {
2373 return matrix_.isAliased( alias );
2383 return matrix_.isAligned();
2393 typename MMM::RightOperand B( matrix_.rightOperand() );
2415 template<
typename MT3
2417 friend inline void assign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2424 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2425 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2427 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2430 else if( left.columns() == 0UL ) {
2445 DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2460 template<
typename MT3
2464 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2465 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2468 DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2470 DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2485 template<
typename MT3
2489 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
2490 selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2510 template<
typename MT3
2514 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2515 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2517 for(
size_t i=0UL; i<A.rows(); ++i ) {
2518 for(
size_t k=0UL; k<B.columns(); ++k ) {
2519 C(i,k) = A(i,0UL) * B(0UL,k);
2521 for(
size_t j=1UL; j<A.columns(); ++j ) {
2522 for(
size_t k=0UL; k<B.columns(); ++k ) {
2523 C(i,k) += A(i,j) * B(j,k);
2526 for(
size_t k=0UL; k<B.columns(); ++k ) {
2547 template<
typename MT3
2551 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2552 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2557 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2561 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2565 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2590 template<
typename MT3
2594 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2595 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2597 typedef IntrinsicTrait<ElementType> IT;
2599 const size_t M( A.rows() );
2600 const size_t N( B.columns() );
2601 const size_t K( A.columns() );
2607 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2608 for(
size_t j=0UL; j<N; ++j ) {
2609 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2610 for(
size_t k=0UL; k<K; ++k ) {
2612 xmm1 = xmm1 + A.load(i ,k) * b1;
2613 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2614 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2615 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2616 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
2617 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
2618 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
2619 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
2621 (~C).
store( i , j, xmm1 * factor );
2622 (~C).
store( i+IT::size , j, xmm2 * factor );
2623 (~C).
store( i+IT::size*2UL, j, xmm3 * factor );
2624 (~C).
store( i+IT::size*3UL, j, xmm4 * factor );
2625 (~C).
store( i+IT::size*4UL, j, xmm5 * factor );
2626 (~C).
store( i+IT::size*5UL, j, xmm6 * factor );
2627 (~C).
store( i+IT::size*6UL, j, xmm7 * factor );
2628 (~C).
store( i+IT::size*7UL, j, xmm8 * factor );
2631 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2633 for( ; (j+2UL) <= N; j+=2UL ) {
2634 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2635 for(
size_t k=0UL; k<K; ++k ) {
2642 xmm1 = xmm1 + a1 * b1;
2643 xmm2 = xmm2 + a2 * b1;
2644 xmm3 = xmm3 + a3 * b1;
2645 xmm4 = xmm4 + a4 * b1;
2646 xmm5 = xmm5 + a1 * b2;
2647 xmm6 = xmm6 + a2 * b2;
2648 xmm7 = xmm7 + a3 * b2;
2649 xmm8 = xmm8 + a4 * b2;
2651 (~C).
store( i , j , xmm1 * factor );
2652 (~C).
store( i+IT::size , j , xmm2 * factor );
2653 (~C).
store( i+IT::size*2UL, j , xmm3 * factor );
2654 (~C).
store( i+IT::size*3UL, j , xmm4 * factor );
2655 (~C).
store( i , j+1UL, xmm5 * factor );
2656 (~C).
store( i+IT::size , j+1UL, xmm6 * factor );
2657 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 * factor );
2658 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 * factor );
2662 for(
size_t k=0UL; k<K; ++k ) {
2664 xmm1 = xmm1 + A.load(i ,k) * b1;
2665 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2666 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2667 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2669 (~C).
store( i , j, xmm1 * factor );
2670 (~C).
store( i+IT::size , j, xmm2 * factor );
2671 (~C).
store( i+IT::size*2UL, j, xmm3 * factor );
2672 (~C).
store( i+IT::size*3UL, j, xmm4 * factor );
2675 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2677 for( ; (j+2UL) <= N; j+=2UL ) {
2679 for(
size_t k=0UL; k<K; ++k ) {
2684 xmm1 = xmm1 + a1 * b1;
2685 xmm2 = xmm2 + a2 * b1;
2686 xmm3 = xmm3 + a1 * b2;
2687 xmm4 = xmm4 + a2 * b2;
2689 (~C).
store( i , j , xmm1 * factor );
2690 (~C).
store( i+IT::size, j , xmm2 * factor );
2691 (~C).
store( i , j+1UL, xmm3 * factor );
2692 (~C).
store( i+IT::size, j+1UL, xmm4 * factor );
2696 for(
size_t k=0UL; k<K; ++k ) {
2698 xmm1 = xmm1 + A.load(i ,k) * b1;
2699 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
2701 (~C).
store( i , j, xmm1 * factor );
2702 (~C).
store( i+IT::size, j, xmm2 * factor );
2707 for( ; (j+2UL) <= N; j+=2UL ) {
2709 for(
size_t k=0UL; k<K; ++k ) {
2711 xmm1 = xmm1 + a1 *
set( B(k,j ) );
2712 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
2714 (~C).
store( i, j , xmm1 * factor );
2715 (~C).
store( i, j+1UL, xmm2 * factor );
2719 for(
size_t k=0UL; k<K; ++k ) {
2720 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
2722 (~C).
store( i, j, xmm1 * factor );
2742 template<
typename MT3
2746 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2747 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2749 selectDefaultAssignKernel( C, A, B, scalar );
2768 template<
typename MT3
2772 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2773 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2775 using boost::numeric_cast;
2781 const int M ( numeric_cast<int>( A.rows() ) );
2782 const int N ( numeric_cast<int>( B.columns() ) );
2783 const int K ( numeric_cast<int>( A.columns() ) );
2784 const int lda( numeric_cast<int>( A.spacing() ) );
2785 const int ldb( numeric_cast<int>( B.spacing() ) );
2786 const int ldc( numeric_cast<int>( C.spacing() ) );
2788 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2789 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2790 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2791 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2811 template<
typename MT3
2815 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2816 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2818 using boost::numeric_cast;
2824 const int M ( numeric_cast<int>( A.rows() ) );
2825 const int N ( numeric_cast<int>( B.columns() ) );
2826 const int K ( numeric_cast<int>( A.columns() ) );
2827 const int lda( numeric_cast<int>( A.spacing() ) );
2828 const int ldb( numeric_cast<int>( B.spacing() ) );
2829 const int ldc( numeric_cast<int>( C.spacing() ) );
2831 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2832 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2833 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2834 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2854 template<
typename MT3
2858 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2859 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2861 using boost::numeric_cast;
2870 const int M ( numeric_cast<int>( A.rows() ) );
2871 const int N ( numeric_cast<int>( B.columns() ) );
2872 const int K ( numeric_cast<int>( A.columns() ) );
2873 const int lda( numeric_cast<int>( A.spacing() ) );
2874 const int ldb( numeric_cast<int>( B.spacing() ) );
2875 const int ldc( numeric_cast<int>( C.spacing() ) );
2876 const complex<float> alpha( scalar );
2877 const complex<float> beta ( 0.0F, 0.0F );
2879 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2880 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2881 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2882 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2902 template<
typename MT3
2906 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2907 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2909 using boost::numeric_cast;
2918 const int M ( numeric_cast<int>( A.rows() ) );
2919 const int N ( numeric_cast<int>( B.columns() ) );
2920 const int K ( numeric_cast<int>( A.columns() ) );
2921 const int lda( numeric_cast<int>( A.spacing() ) );
2922 const int ldb( numeric_cast<int>( B.spacing() ) );
2923 const int ldc( numeric_cast<int>( C.spacing() ) );
2924 const complex<double> alpha( scalar );
2925 const complex<double> beta ( 0.0, 0.0 );
2927 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2928 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2929 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2930 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2947 template<
typename MT
2949 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
2953 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2965 const TmpType tmp( rhs );
2982 template<
typename MT3
2984 friend inline void addAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2991 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2992 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2994 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3008 DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3023 template<
typename MT3
3027 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3028 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3031 DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3033 DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3048 template<
typename MT3
3052 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3053 selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3073 template<
typename MT3
3077 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3078 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3099 template<
typename MT3
3103 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3104 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3109 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3113 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3117 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3142 template<
typename MT3
3146 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3147 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3149 typedef IntrinsicTrait<ElementType> IT;
3151 const size_t M( A.rows() );
3152 const size_t N( B.columns() );
3153 const size_t K( A.columns() );
3159 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3160 for(
size_t j=0UL; j<N; ++j ) {
3161 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3162 for(
size_t k=0UL; k<K; ++k ) {
3164 xmm1 = xmm1 + A.load(i ,k) * b1;
3165 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3166 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3167 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3168 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3169 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3170 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3171 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3173 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3174 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3175 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3176 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3177 (~C).
store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) + xmm5 * factor );
3178 (~C).
store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) + xmm6 * factor );
3179 (~C).
store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) + xmm7 * factor );
3180 (~C).
store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) + xmm8 * factor );
3183 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3185 for( ; (j+2UL) <= N; j+=2UL ) {
3186 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3187 for(
size_t k=0UL; k<K; ++k ) {
3194 xmm1 = xmm1 + a1 * b1;
3195 xmm2 = xmm2 + a2 * b1;
3196 xmm3 = xmm3 + a3 * b1;
3197 xmm4 = xmm4 + a4 * b1;
3198 xmm5 = xmm5 + a1 * b2;
3199 xmm6 = xmm6 + a2 * b2;
3200 xmm7 = xmm7 + a3 * b2;
3201 xmm8 = xmm8 + a4 * b2;
3203 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3204 (~C).
store( i+IT::size , j , (~C).load(i+IT::size ,j ) + xmm2 * factor );
3205 (~C).
store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) + xmm3 * factor );
3206 (~C).
store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) + xmm4 * factor );
3207 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
3208 (~C).
store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) + xmm6 * factor );
3209 (~C).
store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) + xmm7 * factor );
3210 (~C).
store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) + xmm8 * factor );
3214 for(
size_t k=0UL; k<K; ++k ) {
3216 xmm1 = xmm1 + A.load(i ,k) * b1;
3217 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3218 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3219 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3221 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3222 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
3223 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
3224 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
3227 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3229 for( ; (j+2UL) <= N; j+=2UL ) {
3231 for(
size_t k=0UL; k<K; ++k ) {
3236 xmm1 = xmm1 + a1 * b1;
3237 xmm2 = xmm2 + a2 * b1;
3238 xmm3 = xmm3 + a1 * b2;
3239 xmm4 = xmm4 + a2 * b2;
3241 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3242 (~C).
store( i+IT::size, j , (~C).load(i+IT::size,j ) + xmm2 * factor );
3243 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
3244 (~C).
store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) + xmm4 * factor );
3248 for(
size_t k=0UL; k<K; ++k ) {
3250 xmm1 = xmm1 + A.load(i ,k) * b1;
3251 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3253 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3254 (~C).
store( i+IT::size, j, (~C).load(i+IT::size,j) + xmm2 * factor );
3259 for( ; (j+2UL) <= N; j+=2UL ) {
3261 for(
size_t k=0UL; k<K; ++k ) {
3263 xmm1 = xmm1 + a1 *
set( B(k,j ) );
3264 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
3266 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3267 (~C).
store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
3271 for(
size_t k=0UL; k<K; ++k ) {
3272 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
3274 (~C).
store( i, j, (~C).load(i,j) + xmm1 * factor );
3294 template<
typename MT3
3298 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3299 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3301 selectDefaultAddAssignKernel( C, A, B, scalar );
3320 template<
typename MT3
3324 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3325 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3327 using boost::numeric_cast;
3333 const int M ( numeric_cast<int>( A.rows() ) );
3334 const int N ( numeric_cast<int>( B.columns() ) );
3335 const int K ( numeric_cast<int>( A.columns() ) );
3336 const int lda( numeric_cast<int>( A.spacing() ) );
3337 const int ldb( numeric_cast<int>( B.spacing() ) );
3338 const int ldc( numeric_cast<int>( C.spacing() ) );
3340 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3341 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3342 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3343 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3363 template<
typename MT3
3367 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3368 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3370 using boost::numeric_cast;
3376 const int M ( numeric_cast<int>( A.rows() ) );
3377 const int N ( numeric_cast<int>( B.columns() ) );
3378 const int K ( numeric_cast<int>( A.columns() ) );
3379 const int lda( numeric_cast<int>( A.spacing() ) );
3380 const int ldb( numeric_cast<int>( B.spacing() ) );
3381 const int ldc( numeric_cast<int>( C.spacing() ) );
3383 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3384 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3385 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3386 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3406 template<
typename MT3
3410 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3411 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3413 using boost::numeric_cast;
3422 const int M ( numeric_cast<int>( A.rows() ) );
3423 const int N ( numeric_cast<int>( B.columns() ) );
3424 const int K ( numeric_cast<int>( A.columns() ) );
3425 const int lda( numeric_cast<int>( A.spacing() ) );
3426 const int ldb( numeric_cast<int>( B.spacing() ) );
3427 const int ldc( numeric_cast<int>( C.spacing() ) );
3428 const complex<float> alpha( scalar );
3429 const complex<float> beta ( 1.0F, 0.0F );
3431 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3432 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3433 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3434 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3454 template<
typename MT3
3458 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3459 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3461 using boost::numeric_cast;
3470 const int M ( numeric_cast<int>( A.rows() ) );
3471 const int N ( numeric_cast<int>( B.columns() ) );
3472 const int K ( numeric_cast<int>( A.columns() ) );
3473 const int lda( numeric_cast<int>( A.spacing() ) );
3474 const int ldb( numeric_cast<int>( B.spacing() ) );
3475 const int ldc( numeric_cast<int>( C.spacing() ) );
3476 const complex<double> alpha( scalar );
3477 const complex<double> beta ( 1.0, 0.0 );
3479 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3480 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3481 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3482 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3503 template<
typename MT3
3505 friend inline void subAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
3512 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3513 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3515 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3529 DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3544 template<
typename MT3
3548 static inline typename DisableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3549 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3552 DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3554 DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3569 template<
typename MT3
3573 static inline typename EnableIf< UseSMPAssignKernel<MT3,MT4,MT5,ST2> >::Type
3574 selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3594 template<
typename MT3
3598 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3599 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3620 template<
typename MT3
3624 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3625 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3630 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3634 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3638 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3663 template<
typename MT3
3667 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3668 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3670 typedef IntrinsicTrait<ElementType> IT;
3672 const size_t M( A.rows() );
3673 const size_t N( B.columns() );
3674 const size_t K( A.columns() );
3680 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3681 for(
size_t j=0UL; j<N; ++j ) {
3682 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3683 for(
size_t k=0UL; k<K; ++k ) {
3685 xmm1 = xmm1 + A.load(i ,k) * b1;
3686 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3687 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3688 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3689 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3690 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3691 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3692 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3694 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3695 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3696 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3697 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3698 (~C).
store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) - xmm5 * factor );
3699 (~C).
store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) - xmm6 * factor );
3700 (~C).
store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) - xmm7 * factor );
3701 (~C).
store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) - xmm8 * factor );
3704 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3706 for( ; (j+2UL) <= N; j+=2UL ) {
3707 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3708 for(
size_t k=0UL; k<K; ++k ) {
3715 xmm1 = xmm1 + a1 * b1;
3716 xmm2 = xmm2 + a2 * b1;
3717 xmm3 = xmm3 + a3 * b1;
3718 xmm4 = xmm4 + a4 * b1;
3719 xmm5 = xmm5 + a1 * b2;
3720 xmm6 = xmm6 + a2 * b2;
3721 xmm7 = xmm7 + a3 * b2;
3722 xmm8 = xmm8 + a4 * b2;
3724 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3725 (~C).
store( i+IT::size , j , (~C).load(i+IT::size ,j ) - xmm2 * factor );
3726 (~C).
store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) - xmm3 * factor );
3727 (~C).
store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) - xmm4 * factor );
3728 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
3729 (~C).
store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) - xmm6 * factor );
3730 (~C).
store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) - xmm7 * factor );
3731 (~C).
store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) - xmm8 * factor );
3735 for(
size_t k=0UL; k<K; ++k ) {
3737 xmm1 = xmm1 + A.load(i ,k) * b1;
3738 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3739 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3740 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3742 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3743 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3744 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3745 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3748 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3750 for( ; (j+2UL) <= N; j+=2UL ) {
3752 for(
size_t k=0UL; k<K; ++k ) {
3757 xmm1 = xmm1 + a1 * b1;
3758 xmm2 = xmm2 + a2 * b1;
3759 xmm3 = xmm3 + a1 * b2;
3760 xmm4 = xmm4 + a2 * b2;
3762 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3763 (~C).
store( i+IT::size, j , (~C).load(i+IT::size,j ) - xmm2 * factor );
3764 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
3765 (~C).
store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) - xmm4 * factor );
3769 for(
size_t k=0UL; k<K; ++k ) {
3771 xmm1 = xmm1 + A.load(i ,k) * b1;
3772 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3774 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3775 (~C).
store( i+IT::size, j, (~C).load(i+IT::size,j) - xmm2 * factor );
3780 for( ; (j+2UL) <= N; j+=2UL ) {
3782 for(
size_t k=0UL; k<K; ++k ) {
3784 xmm1 = xmm1 + a1 *
set( B(k,j ) );
3785 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
3787 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
3788 (~C).
store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
3792 for(
size_t k=0UL; k<K; ++k ) {
3793 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
3795 (~C).
store( i, j, (~C).load(i,j) - xmm1 * factor );
3815 template<
typename MT3
3819 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3820 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3822 selectDefaultSubAssignKernel( C, A, B, scalar );
3841 template<
typename MT3
3845 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3846 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3848 using boost::numeric_cast;
3854 const int M ( numeric_cast<int>( A.rows() ) );
3855 const int N ( numeric_cast<int>( B.columns() ) );
3856 const int K ( numeric_cast<int>( A.columns() ) );
3857 const int lda( numeric_cast<int>( A.spacing() ) );
3858 const int ldb( numeric_cast<int>( B.spacing() ) );
3859 const int ldc( numeric_cast<int>( C.spacing() ) );
3861 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3862 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3863 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3864 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3884 template<
typename MT3
3888 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3889 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3891 using boost::numeric_cast;
3897 const int M ( numeric_cast<int>( A.rows() ) );
3898 const int N ( numeric_cast<int>( B.columns() ) );
3899 const int K ( numeric_cast<int>( A.columns() ) );
3900 const int lda( numeric_cast<int>( A.spacing() ) );
3901 const int ldb( numeric_cast<int>( B.spacing() ) );
3902 const int ldc( numeric_cast<int>( C.spacing() ) );
3904 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3905 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3906 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3907 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3927 template<
typename MT3
3931 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3932 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3934 using boost::numeric_cast;
3943 const int M ( numeric_cast<int>( A.rows() ) );
3944 const int N ( numeric_cast<int>( B.columns() ) );
3945 const int K ( numeric_cast<int>( A.columns() ) );
3946 const int lda( numeric_cast<int>( A.spacing() ) );
3947 const int ldb( numeric_cast<int>( B.spacing() ) );
3948 const int ldc( numeric_cast<int>( C.spacing() ) );
3949 const complex<float> alpha( -scalar );
3950 const complex<float> beta ( 1.0F, 0.0F );
3952 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3953 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3954 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3955 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3975 template<
typename MT3
3979 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3980 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3982 using boost::numeric_cast;
3991 const int M ( numeric_cast<int>( A.rows() ) );
3992 const int N ( numeric_cast<int>( B.columns() ) );
3993 const int K ( numeric_cast<int>( A.columns() ) );
3994 const int lda( numeric_cast<int>( A.spacing() ) );
3995 const int ldb( numeric_cast<int>( B.spacing() ) );
3996 const int ldc( numeric_cast<int>( C.spacing() ) );
3997 const complex<double> alpha( -scalar );
3998 const complex<double> beta ( 1.0, 0.0 );
4000 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4001 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4002 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
4003 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4069 template<
typename T1
4071 inline const TDMatTDMatMultExpr<T1,T2>
4077 throw std::invalid_argument(
"Matrix sizes do not match" );
4094 template<
typename MT1,
typename MT2,
typename VT >
4099 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4100 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4101 IsDenseVector<VT>::value && IsColumnVector<VT>::value
4102 ,
typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4103 , INVALID_TYPE >::Type Type;
4112 template<
typename MT1,
typename MT2,
typename VT >
4117 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4118 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4119 IsSparseVector<VT>::value && IsColumnVector<VT>::value
4120 ,
typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4121 , INVALID_TYPE >::Type Type;
4130 template<
typename VT,
typename MT1,
typename MT2 >
4135 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4136 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4137 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4138 ,
typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4139 , INVALID_TYPE >::Type Type;
4148 template<
typename VT,
typename MT1,
typename MT2 >
4153 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4154 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
4155 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4156 ,
typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4157 , INVALID_TYPE >::Type Type;
4166 template<
typename MT1,
typename MT2,
bool AF >
4171 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4172 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4181 template<
typename MT1,
typename MT2 >
4186 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4195 template<
typename MT1,
typename MT2 >
4200 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:253
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:124
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4579
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4075
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:249
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:122
const size_t TDMATTDMATMULT_THRESHOLD
Column-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies t...
Definition: Thresholds.h:176
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:151
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:197
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:410
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:123
Header file for the IsColumnMajorMatrix type trait.
Header file for the sparse matrix SMP implementation.
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: TDMatTDMatMultExpr.h:391
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2384
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:249
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:244
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:297
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:247
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:127
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:369
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2380
Header file for the IsFloat type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:347
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:246
TDMatTDMatMultExpr< MT1, MT2 > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:243
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:121
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the multiplication trait.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:119
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TSVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:256
Header file for the TDMatSVecMultExprTrait class template.
Header file for the dense matrix SMP implementation.
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:357
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2382
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:250
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:381
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:91
Header file for the IsNumeric type trait.
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
const size_t SMP_TDMATTDMATMULT_THRESHOLD
SMP column-major dense matrix/column-major dense matrix multiplication threshold.This threshold repre...
Definition: Thresholds.h:472
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:245
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:247
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:327
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:259
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:282
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:120
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:248
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2379
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the complex data type.
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:262
Header file for the IsColumnVector type trait.
Header file for the IsResizable type trait.
Constraint on the data type.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: TDMatTDMatMultExpr.h:401
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Header file for the TDVecTDMatMultExprTrait class template.
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
void store(float *address, const sse_float_t &value)
Aligned store of a vector of 'float' values.
Definition: Store.h:242
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:337
Header file for the IsExpression type trait class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:121
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTDMatMultExpr.h:248
Header file for the FunctionTrace class.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:411