35 #ifndef _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_TDMATTDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
109 template<
typename MT1
111 class TDMatTDMatMultExpr :
public DenseMatrix< TDMatTDMatMultExpr<MT1,MT2>, true >
112 ,
private MatMatMultExpr
113 ,
private Computation
130 template<
typename T1,
typename T2,
typename T3 >
131 struct UseSinglePrecisionKernel {
144 template<
typename T1,
typename T2,
typename T3 >
145 struct UseDoublePrecisionKernel {
159 template<
typename T1,
typename T2,
typename T3 >
160 struct UseSinglePrecisionComplexKernel {
161 typedef complex<float> Type;
162 enum { value = IsSame<typename T1::ElementType,Type>::value &&
163 IsSame<typename T2::ElementType,Type>::value &&
164 IsSame<typename T3::ElementType,Type>::value };
175 template<
typename T1,
typename T2,
typename T3 >
176 struct UseDoublePrecisionComplexKernel {
177 typedef complex<double> Type;
178 enum { value = IsSame<typename T1::ElementType,Type>::value &&
179 IsSame<typename T2::ElementType,Type>::value &&
180 IsSame<typename T3::ElementType,Type>::value };
190 template<
typename T1,
typename T2,
typename T3 >
191 struct UseDefaultKernel {
192 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
193 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
194 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
195 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
205 template<
typename T1,
typename T2,
typename T3 >
206 struct UseVectorizedDefaultKernel {
207 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
208 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
209 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
210 IntrinsicTrait<typename T1::ElementType>::addition &&
211 IntrinsicTrait<typename T1::ElementType>::multiplication };
242 enum { vectorizable = 0 };
272 if(
lhs_.columns() != 0UL ) {
273 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
275 for(
size_t k=1UL; k<end; k+=2UL ) {
277 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
279 if( end <
lhs_.columns() ) {
307 return rhs_.columns();
337 template<
typename T >
339 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
349 template<
typename T >
351 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
371 template<
typename MT
380 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
383 else if( rhs.lhs_.columns() == 0UL ) {
399 TDMatTDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
401 TDMatTDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
420 template<
typename MT3
424 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
426 const size_t M( A.rows() );
427 const size_t N( B.columns() );
428 const size_t K( A.columns() );
430 for(
size_t i=0UL; i<M; ++i ) {
431 for(
size_t j=0UL; j<N; ++j ) {
432 C(i,j) = A(i,0UL) * B(0UL,j);
434 for(
size_t k=1UL; k<K; ++k ) {
435 for(
size_t j=0UL; j<N; ++j ) {
436 C(i,j) += A(i,k) * B(k,j);
458 template<
typename MT3
461 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
462 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
467 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
471 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
475 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
501 template<
typename MT3
504 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
505 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
507 typedef IntrinsicTrait<ElementType> IT;
509 const size_t M( A.rows() );
510 const size_t N( B.columns() );
511 const size_t K( A.columns() );
515 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
516 for(
size_t j=0UL; j<N; ++j ) {
517 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
518 for(
size_t k=0UL; k<K; ++k ) {
520 xmm1 = xmm1 + A.load(i ,k) * b1;
521 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
522 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
523 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
524 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
525 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
526 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
527 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
529 (~C).
store( i , j, xmm1 );
530 (~C).
store( i+IT::size , j, xmm2 );
531 (~C).
store( i+IT::size*2UL, j, xmm3 );
532 (~C).
store( i+IT::size*3UL, j, xmm4 );
533 (~C).
store( i+IT::size*4UL, j, xmm5 );
534 (~C).
store( i+IT::size*5UL, j, xmm6 );
535 (~C).
store( i+IT::size*6UL, j, xmm7 );
536 (~C).
store( i+IT::size*7UL, j, xmm8 );
539 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
541 for( ; (j+2UL) <= N; j+=2UL ) {
542 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
543 for(
size_t k=0UL; k<K; ++k ) {
550 xmm1 = xmm1 + a1 * b1;
551 xmm2 = xmm2 + a2 * b1;
552 xmm3 = xmm3 + a3 * b1;
553 xmm4 = xmm4 + a4 * b1;
554 xmm5 = xmm5 + a1 * b2;
555 xmm6 = xmm6 + a2 * b2;
556 xmm7 = xmm7 + a3 * b2;
557 xmm8 = xmm8 + a4 * b2;
559 (~C).
store( i , j , xmm1 );
560 (~C).
store( i+IT::size , j , xmm2 );
561 (~C).
store( i+IT::size*2UL, j , xmm3 );
562 (~C).
store( i+IT::size*3UL, j , xmm4 );
563 (~C).
store( i , j+1UL, xmm5 );
564 (~C).
store( i+IT::size , j+1UL, xmm6 );
565 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 );
566 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 );
570 for(
size_t k=0UL; k<K; ++k ) {
572 xmm1 = xmm1 + A.load(i ,k) * b1;
573 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
574 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
575 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
577 (~C).
store( i , j, xmm1 );
578 (~C).
store( i+IT::size , j, xmm2 );
579 (~C).
store( i+IT::size*2UL, j, xmm3 );
580 (~C).
store( i+IT::size*3UL, j, xmm4 );
583 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
585 for( ; (j+2UL) <= N; j+=2UL ) {
587 for(
size_t k=0UL; k<K; ++k ) {
592 xmm1 = xmm1 + a1 * b1;
593 xmm2 = xmm2 + a2 * b1;
594 xmm3 = xmm3 + a1 * b2;
595 xmm4 = xmm4 + a2 * b2;
597 (~C).
store( i , j , xmm1 );
598 (~C).
store( i+IT::size, j , xmm2 );
599 (~C).
store( i , j+1UL, xmm3 );
600 (~C).
store( i+IT::size, j+1UL, xmm4 );
604 for(
size_t k=0UL; k<K; ++k ) {
606 xmm1 = xmm1 + A.load(i ,k) * b1;
607 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
609 (~C).
store( i , j, xmm1 );
610 (~C).
store( i+IT::size, j, xmm2 );
615 for( ; (j+2UL) <= N; j+=2UL ) {
617 for(
size_t k=0UL; k<K; ++k ) {
619 xmm1 = xmm1 + a1 *
set( B(k,j ) );
620 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
622 (~C).
store( i, j , xmm1 );
623 (~C).
store( i, j+1UL, xmm2 );
627 for(
size_t k=0UL; k<K; ++k ) {
628 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
630 (~C).
store( i, j, xmm1 );
651 template<
typename MT3
654 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
655 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
657 selectDefaultAssignKernel( C, A, B );
677 template<
typename MT3
680 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
681 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
683 using boost::numeric_cast;
689 const int M ( numeric_cast<int>( A.rows() ) );
690 const int N ( numeric_cast<int>( B.columns() ) );
691 const int K ( numeric_cast<int>( A.columns() ) );
692 const int lda( numeric_cast<int>( A.spacing() ) );
693 const int ldb( numeric_cast<int>( B.spacing() ) );
694 const int ldc( numeric_cast<int>( C.spacing() ) );
696 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
697 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
698 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
699 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
720 template<
typename MT3
723 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
724 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
726 using boost::numeric_cast;
732 const int M ( numeric_cast<int>( A.rows() ) );
733 const int N ( numeric_cast<int>( B.columns() ) );
734 const int K ( numeric_cast<int>( A.columns() ) );
735 const int lda( numeric_cast<int>( A.spacing() ) );
736 const int ldb( numeric_cast<int>( B.spacing() ) );
737 const int ldc( numeric_cast<int>( C.spacing() ) );
739 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
740 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
741 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
742 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
763 template<
typename MT3
766 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
767 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
769 using boost::numeric_cast;
778 const int M ( numeric_cast<int>( A.rows() ) );
779 const int N ( numeric_cast<int>( B.columns() ) );
780 const int K ( numeric_cast<int>( A.columns() ) );
781 const int lda( numeric_cast<int>( A.spacing() ) );
782 const int ldb( numeric_cast<int>( B.spacing() ) );
783 const int ldc( numeric_cast<int>( C.spacing() ) );
784 complex<float> alpha( 1.0F, 0.0F );
785 complex<float> beta ( 0.0F, 0.0F );
787 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
788 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
789 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
790 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
811 template<
typename MT3
814 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
815 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
817 using boost::numeric_cast;
826 const int M ( numeric_cast<int>( A.rows() ) );
827 const int N ( numeric_cast<int>( B.columns() ) );
828 const int K ( numeric_cast<int>( A.columns() ) );
829 const int lda( numeric_cast<int>( A.spacing() ) );
830 const int ldb( numeric_cast<int>( B.spacing() ) );
831 const int ldc( numeric_cast<int>( C.spacing() ) );
832 complex<double> alpha( 1.0, 0.0 );
833 complex<double> beta ( 0.0, 0.0 );
835 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
836 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
837 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
838 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
857 template<
typename MT
863 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
875 const TmpType tmp( rhs );
894 template<
typename MT
903 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
918 TDMatTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
920 TDMatTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
939 template<
typename MT3
942 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
943 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
945 const size_t M( A.rows() );
946 const size_t N( B.columns() );
947 const size_t K( A.columns() );
950 const size_t end( N &
size_t(-2) );
952 for(
size_t i=0UL; i<M; ++i ) {
953 for(
size_t k=0UL; k<K; ++k ) {
954 for(
size_t j=0UL; j<end; j+=2UL ) {
955 C(i,j ) += A(i,k) * B(k,j );
956 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
959 C(i,end) += A(i,k) * B(k,end);
981 template<
typename MT3
984 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
985 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
990 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
994 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
998 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1024 template<
typename MT3
1027 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1028 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1030 typedef IntrinsicTrait<ElementType> IT;
1032 const size_t M( A.rows() );
1033 const size_t N( B.columns() );
1034 const size_t K( A.columns() );
1038 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1039 for(
size_t j=0UL; j<N; ++j ) {
1048 for(
size_t k=0UL; k<K; ++k ) {
1050 xmm1 = xmm1 + A.load(i ,k) * b1;
1051 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1052 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1053 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1054 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
1055 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
1056 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
1057 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
1059 (~C).
store( i , j, xmm1 );
1060 (~C).
store( i+IT::size , j, xmm2 );
1061 (~C).
store( i+IT::size*2UL, j, xmm3 );
1062 (~C).
store( i+IT::size*3UL, j, xmm4 );
1063 (~C).
store( i+IT::size*4UL, j, xmm5 );
1064 (~C).
store( i+IT::size*5UL, j, xmm6 );
1065 (~C).
store( i+IT::size*6UL, j, xmm7 );
1066 (~C).
store( i+IT::size*7UL, j, xmm8 );
1069 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1071 for( ; (j+2UL) <= N; j+=2UL ) {
1080 for(
size_t k=0UL; k<K; ++k ) {
1087 xmm1 = xmm1 + a1 * b1;
1088 xmm2 = xmm2 + a2 * b1;
1089 xmm3 = xmm3 + a3 * b1;
1090 xmm4 = xmm4 + a4 * b1;
1091 xmm5 = xmm5 + a1 * b2;
1092 xmm6 = xmm6 + a2 * b2;
1093 xmm7 = xmm7 + a3 * b2;
1094 xmm8 = xmm8 + a4 * b2;
1096 (~C).
store( i , j , xmm1 );
1097 (~C).
store( i+IT::size , j , xmm2 );
1098 (~C).
store( i+IT::size*2UL, j , xmm3 );
1099 (~C).
store( i+IT::size*3UL, j , xmm4 );
1100 (~C).
store( i , j+1UL, xmm5 );
1101 (~C).
store( i+IT::size , j+1UL, xmm6 );
1102 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 );
1103 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 );
1110 for(
size_t k=0UL; k<K; ++k ) {
1112 xmm1 = xmm1 + A.load(i ,k) * b1;
1113 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
1114 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
1115 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
1117 (~C).
store( i , j, xmm1 );
1118 (~C).
store( i+IT::size , j, xmm2 );
1119 (~C).
store( i+IT::size*2UL, j, xmm3 );
1120 (~C).
store( i+IT::size*3UL, j, xmm4 );
1123 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1125 for( ; (j+2UL) <= N; j+=2UL ) {
1130 for(
size_t k=0UL; k<K; ++k ) {
1135 xmm1 = xmm1 + a1 * b1;
1136 xmm2 = xmm2 + a2 * b1;
1137 xmm3 = xmm3 + a1 * b2;
1138 xmm4 = xmm4 + a2 * b2;
1140 (~C).
store( i , j , xmm1 );
1141 (~C).
store( i+IT::size, j , xmm2 );
1142 (~C).
store( i , j+1UL, xmm3 );
1143 (~C).
store( i+IT::size, j+1UL, xmm4 );
1148 for(
size_t k=0UL; k<K; ++k ) {
1150 xmm1 = xmm1 + A.load(i ,k) * b1;
1151 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
1153 (~C).
store( i , j, xmm1 );
1154 (~C).
store( i+IT::size, j, xmm2 );
1159 for( ; (j+2UL) <= N; j+=2UL ) {
1162 for(
size_t k=0UL; k<K; ++k ) {
1164 xmm1 = xmm1 + a1 *
set( B(k,j ) );
1165 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
1167 (~C).
store( i, j , xmm1 );
1168 (~C).
store( i, j+1UL, xmm2 );
1172 for(
size_t k=0UL; k<K; ++k ) {
1173 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
1175 (~C).
store( i, j, xmm1 );
1196 template<
typename MT3
1199 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1200 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1202 selectDefaultAddAssignKernel( C, A, B );
1222 template<
typename MT3
1225 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1226 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1228 using boost::numeric_cast;
1234 const int M ( numeric_cast<int>( A.rows() ) );
1235 const int N ( numeric_cast<int>( B.columns() ) );
1236 const int K ( numeric_cast<int>( A.columns() ) );
1237 const int lda( numeric_cast<int>( A.spacing() ) );
1238 const int ldb( numeric_cast<int>( B.spacing() ) );
1239 const int ldc( numeric_cast<int>( C.spacing() ) );
1241 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1242 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1243 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1244 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1265 template<
typename MT3
1268 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1269 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1271 using boost::numeric_cast;
1277 const int M ( numeric_cast<int>( A.rows() ) );
1278 const int N ( numeric_cast<int>( B.columns() ) );
1279 const int K ( numeric_cast<int>( A.columns() ) );
1280 const int lda( numeric_cast<int>( A.spacing() ) );
1281 const int ldb( numeric_cast<int>( B.spacing() ) );
1282 const int ldc( numeric_cast<int>( C.spacing() ) );
1284 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1285 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1286 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1287 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1308 template<
typename MT3
1311 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1312 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1314 using boost::numeric_cast;
1323 const int M ( numeric_cast<int>( A.rows() ) );
1324 const int N ( numeric_cast<int>( B.columns() ) );
1325 const int K ( numeric_cast<int>( A.columns() ) );
1326 const int lda( numeric_cast<int>( A.spacing() ) );
1327 const int ldb( numeric_cast<int>( B.spacing() ) );
1328 const int ldc( numeric_cast<int>( C.spacing() ) );
1329 const complex<float> alpha( 1.0F, 0.0F );
1330 const complex<float> beta ( 1.0F, 0.0F );
1332 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1333 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1334 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1335 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1356 template<
typename MT3
1359 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1360 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1362 using boost::numeric_cast;
1371 const int M ( numeric_cast<int>( A.rows() ) );
1372 const int N ( numeric_cast<int>( B.columns() ) );
1373 const int K ( numeric_cast<int>( A.columns() ) );
1374 const int lda( numeric_cast<int>( A.spacing() ) );
1375 const int ldb( numeric_cast<int>( B.spacing() ) );
1376 const int ldc( numeric_cast<int>( C.spacing() ) );
1377 const complex<double> alpha( 1.0, 0.0 );
1378 const complex<double> beta ( 1.0, 0.0 );
1380 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1381 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1382 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1383 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1406 template<
typename MT
1415 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1430 TDMatTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1432 TDMatTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1451 template<
typename MT3
1454 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1455 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1457 const size_t M( A.rows() );
1458 const size_t N( B.columns() );
1459 const size_t K( A.columns() );
1462 const size_t end( N &
size_t(-2) );
1464 for(
size_t i=0UL; i<M; ++i ) {
1465 for(
size_t k=0UL; k<K; ++k ) {
1466 for(
size_t j=0UL; j<end; j+=2UL ) {
1467 C(i,j ) -= A(i,k) * B(k,j );
1468 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1471 C(i,end) -= A(i,k) * B(k,end);
1493 template<
typename MT3
1496 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1497 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1502 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1506 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1510 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
1536 template<
typename MT3
1539 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1540 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1542 typedef IntrinsicTrait<ElementType> IT;
1544 const size_t M( A.rows() );
1545 const size_t N( B.columns() );
1546 const size_t K( A.columns() );
1550 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
1551 for(
size_t j=0UL; j<N; ++j ) {
1560 for(
size_t k=0UL; k<K; ++k ) {
1562 xmm1 = xmm1 - A.load(i ,k) * b1;
1563 xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1564 xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1565 xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1566 xmm5 = xmm5 - A.load(i+IT::size*4UL,k) * b1;
1567 xmm6 = xmm6 - A.load(i+IT::size*5UL,k) * b1;
1568 xmm7 = xmm7 - A.load(i+IT::size*6UL,k) * b1;
1569 xmm8 = xmm8 - A.load(i+IT::size*7UL,k) * b1;
1571 (~C).
store( i , j, xmm1 );
1572 (~C).
store( i+IT::size , j, xmm2 );
1573 (~C).
store( i+IT::size*2UL, j, xmm3 );
1574 (~C).
store( i+IT::size*3UL, j, xmm4 );
1575 (~C).
store( i+IT::size*4UL, j, xmm5 );
1576 (~C).
store( i+IT::size*5UL, j, xmm6 );
1577 (~C).
store( i+IT::size*6UL, j, xmm7 );
1578 (~C).
store( i+IT::size*7UL, j, xmm8 );
1581 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
1583 for( ; (j+2UL) <= N; j+=2UL ) {
1592 for(
size_t k=0UL; k<K; ++k ) {
1599 xmm1 = xmm1 - a1 * b1;
1600 xmm2 = xmm2 - a2 * b1;
1601 xmm3 = xmm3 - a3 * b1;
1602 xmm4 = xmm4 - a4 * b1;
1603 xmm5 = xmm5 - a1 * b2;
1604 xmm6 = xmm6 - a2 * b2;
1605 xmm7 = xmm7 - a3 * b2;
1606 xmm8 = xmm8 - a4 * b2;
1608 (~C).
store( i , j , xmm1 );
1609 (~C).
store( i+IT::size , j , xmm2 );
1610 (~C).
store( i+IT::size*2UL, j , xmm3 );
1611 (~C).
store( i+IT::size*3UL, j , xmm4 );
1612 (~C).
store( i , j+1UL, xmm5 );
1613 (~C).
store( i+IT::size , j+1UL, xmm6 );
1614 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 );
1615 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 );
1622 for(
size_t k=0UL; k<K; ++k ) {
1624 xmm1 = xmm1 - A.load(i ,k) * b1;
1625 xmm2 = xmm2 - A.load(i+IT::size ,k) * b1;
1626 xmm3 = xmm3 - A.load(i+IT::size*2UL,k) * b1;
1627 xmm4 = xmm4 - A.load(i+IT::size*3UL,k) * b1;
1629 (~C).
store( i , j, xmm1 );
1630 (~C).
store( i+IT::size , j, xmm2 );
1631 (~C).
store( i+IT::size*2UL, j, xmm3 );
1632 (~C).
store( i+IT::size*3UL, j, xmm4 );
1635 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
1637 for( ; (j+2UL) <= N; j+=2UL ) {
1642 for(
size_t k=0UL; k<K; ++k ) {
1647 xmm1 = xmm1 - a1 * b1;
1648 xmm2 = xmm2 - a2 * b1;
1649 xmm3 = xmm3 - a1 * b2;
1650 xmm4 = xmm4 - a2 * b2;
1652 (~C).
store( i , j , xmm1 );
1653 (~C).
store( i+IT::size, j , xmm2 );
1654 (~C).
store( i , j+1UL, xmm3 );
1655 (~C).
store( i+IT::size, j+1UL, xmm4 );
1660 for(
size_t k=0UL; k<K; ++k ) {
1662 xmm1 = xmm1 - A.load(i ,k) * b1;
1663 xmm2 = xmm2 - A.load(i+IT::size,k) * b1;
1665 (~C).
store( i , j, xmm1 );
1666 (~C).
store( i+IT::size, j, xmm2 );
1671 for( ; (j+2UL) <= N; j+=2UL ) {
1674 for(
size_t k=0UL; k<K; ++k ) {
1676 xmm1 = xmm1 - a1 *
set( B(k,j ) );
1677 xmm2 = xmm2 - a1 *
set( B(k,j+1UL) );
1679 (~C).
store( i, j , xmm1 );
1680 (~C).
store( i, j+1UL, xmm2 );
1684 for(
size_t k=0UL; k<K; ++k ) {
1685 xmm1 = xmm1 - A.load(i,k) *
set( B(k,j) );
1687 (~C).
store( i, j, xmm1 );
1708 template<
typename MT3
1711 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1712 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1714 selectDefaultSubAssignKernel( C, A, B );
1734 template<
typename MT3
1737 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1738 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1740 using boost::numeric_cast;
1746 const int M ( numeric_cast<int>( A.rows() ) );
1747 const int N ( numeric_cast<int>( B.columns() ) );
1748 const int K ( numeric_cast<int>( A.columns() ) );
1749 const int lda( numeric_cast<int>( A.spacing() ) );
1750 const int ldb( numeric_cast<int>( B.spacing() ) );
1751 const int ldc( numeric_cast<int>( C.spacing() ) );
1753 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1754 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1755 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1756 M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1777 template<
typename MT3
1780 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1781 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1783 using boost::numeric_cast;
1789 const int M ( numeric_cast<int>( A.rows() ) );
1790 const int N ( numeric_cast<int>( B.columns() ) );
1791 const int K ( numeric_cast<int>( A.columns() ) );
1792 const int lda( numeric_cast<int>( A.spacing() ) );
1793 const int ldb( numeric_cast<int>( B.spacing() ) );
1794 const int ldc( numeric_cast<int>( C.spacing() ) );
1796 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1797 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1798 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1799 M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1820 template<
typename MT3
1823 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1824 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1826 using boost::numeric_cast;
1835 const int M ( numeric_cast<int>( A.rows() ) );
1836 const int N ( numeric_cast<int>( B.columns() ) );
1837 const int K ( numeric_cast<int>( A.columns() ) );
1838 const int lda( numeric_cast<int>( A.spacing() ) );
1839 const int ldb( numeric_cast<int>( B.spacing() ) );
1840 const int ldc( numeric_cast<int>( C.spacing() ) );
1841 const complex<float> alpha( -1.0F, 0.0F );
1842 const complex<float> beta ( 1.0F, 0.0F );
1844 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1845 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1846 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1847 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1868 template<
typename MT3
1871 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1872 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1874 using boost::numeric_cast;
1883 const int M ( numeric_cast<int>( A.rows() ) );
1884 const int N ( numeric_cast<int>( B.columns() ) );
1885 const int K ( numeric_cast<int>( A.columns() ) );
1886 const int lda( numeric_cast<int>( A.spacing() ) );
1887 const int ldb( numeric_cast<int>( B.spacing() ) );
1888 const int ldc( numeric_cast<int>( C.spacing() ) );
1889 const complex<double> alpha( -1.0, 0.0 );
1890 const complex<double> beta ( 1.0, 0.0 );
1892 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1893 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1894 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1895 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1941 template<
typename MT1
1945 :
public DenseMatrix< DMatScalarMultExpr< TDMatTDMatMultExpr<MT1,MT2>, ST, true >, true >
1946 ,
private MatScalarMultExpr
1947 ,
private Computation
1951 typedef TDMatTDMatMultExpr<MT1,MT2> MMM;
1964 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1965 struct UseSinglePrecisionKernel {
1966 enum { value = IsFloat<typename T1::ElementType>::value &&
1967 IsFloat<typename T2::ElementType>::value &&
1968 IsFloat<typename T3::ElementType>::value &&
1969 !IsComplex<T4>::value };
1978 template<
typename T1,
typename T2,
typename T3,
typename T4 >
1979 struct UseDoublePrecisionKernel {
1980 enum { value = IsDouble<typename T1::ElementType>::value &&
1981 IsDouble<typename T2::ElementType>::value &&
1982 IsDouble<typename T3::ElementType>::value &&
1983 !IsComplex<T4>::value };
1992 template<
typename T1,
typename T2,
typename T3 >
1993 struct UseSinglePrecisionComplexKernel {
1994 typedef complex<float> Type;
1995 enum { value = IsSame<typename T1::ElementType,Type>::value &&
1996 IsSame<typename T2::ElementType,Type>::value &&
1997 IsSame<typename T3::ElementType,Type>::value };
2006 template<
typename T1,
typename T2,
typename T3 >
2007 struct UseDoublePrecisionComplexKernel {
2008 typedef complex<double> Type;
2009 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2010 IsSame<typename T2::ElementType,Type>::value &&
2011 IsSame<typename T3::ElementType,Type>::value };
2019 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2020 struct UseDefaultKernel {
2021 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2022 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2023 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2024 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2032 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2033 struct UseVectorizedDefaultKernel {
2034 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2035 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2036 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2037 IsSame<typename T1::ElementType,T4>::value &&
2038 IntrinsicTrait<typename T1::ElementType>::addition &&
2039 IntrinsicTrait<typename T1::ElementType>::multiplication };
2045 typedef DMatScalarMultExpr<MMM,ST,true>
This;
2046 typedef typename MultTrait<RES,ST>::Type
ResultType;
2050 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2055 typedef const TDMatTDMatMultExpr<MT1,MT2>
LeftOperand;
2061 typedef typename SelectType< IsComputation<MT1>::value,
const RT1,
CT1 >::Type
LT;
2064 typedef typename SelectType< IsComputation<MT2>::value,
const RT2,
CT2 >::Type
RT;
2069 enum { vectorizable = 0 };
2078 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2094 return matrix_(i,j) * scalar_;
2103 inline size_t rows()
const {
2113 inline size_t columns()
const {
2144 template<
typename T >
2145 inline bool canAlias(
const T* alias )
const {
2146 return matrix_.canAlias( alias );
2156 template<
typename T >
2157 inline bool isAliased(
const T* alias )
const {
2158 return matrix_.isAliased( alias );
2177 template<
typename MT3
2179 friend inline void assign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2186 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2187 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2189 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2192 else if( left.columns() == 0UL ) {
2208 DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2210 DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2228 template<
typename MT3
2232 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2233 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2235 for(
size_t i=0UL; i<A.rows(); ++i ) {
2236 for(
size_t k=0UL; k<B.columns(); ++k ) {
2237 C(i,k) = A(i,0UL) * B(0UL,k);
2239 for(
size_t j=1UL; j<A.columns(); ++j ) {
2240 for(
size_t k=0UL; k<B.columns(); ++k ) {
2241 C(i,k) += A(i,j) * B(j,k);
2244 for(
size_t k=0UL; k<B.columns(); ++k ) {
2265 template<
typename MT3
2269 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2270 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2275 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2277 assign( ~C, A * tmp * scalar );
2279 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2281 assign( ~C, tmp * B * scalar );
2283 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2285 assign( ~C, A * tmp * scalar );
2289 assign( ~C, tmp * B * scalar );
2308 template<
typename MT3
2312 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2313 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2315 typedef IntrinsicTrait<ElementType> IT;
2317 const size_t M( A.rows() );
2318 const size_t N( B.columns() );
2319 const size_t K( A.columns() );
2325 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2326 for(
size_t j=0UL; j<N; ++j ) {
2327 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2328 for(
size_t k=0UL; k<K; ++k ) {
2330 xmm1 = xmm1 + A.load(i ,k) * b1;
2331 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2332 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2333 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2334 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
2335 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
2336 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
2337 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
2339 (~C).
store( i , j, xmm1 * factor );
2340 (~C).
store( i+IT::size , j, xmm2 * factor );
2341 (~C).
store( i+IT::size*2UL, j, xmm3 * factor );
2342 (~C).
store( i+IT::size*3UL, j, xmm4 * factor );
2343 (~C).
store( i+IT::size*4UL, j, xmm5 * factor );
2344 (~C).
store( i+IT::size*5UL, j, xmm6 * factor );
2345 (~C).
store( i+IT::size*6UL, j, xmm7 * factor );
2346 (~C).
store( i+IT::size*7UL, j, xmm8 * factor );
2349 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2351 for( ; (j+2UL) <= N; j+=2UL ) {
2352 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2353 for(
size_t k=0UL; k<K; ++k ) {
2360 xmm1 = xmm1 + a1 * b1;
2361 xmm2 = xmm2 + a2 * b1;
2362 xmm3 = xmm3 + a3 * b1;
2363 xmm4 = xmm4 + a4 * b1;
2364 xmm5 = xmm5 + a1 * b2;
2365 xmm6 = xmm6 + a2 * b2;
2366 xmm7 = xmm7 + a3 * b2;
2367 xmm8 = xmm8 + a4 * b2;
2369 (~C).
store( i , j , xmm1 * factor );
2370 (~C).
store( i+IT::size , j , xmm2 * factor );
2371 (~C).
store( i+IT::size*2UL, j , xmm3 * factor );
2372 (~C).
store( i+IT::size*3UL, j , xmm4 * factor );
2373 (~C).
store( i , j+1UL, xmm5 * factor );
2374 (~C).
store( i+IT::size , j+1UL, xmm6 * factor );
2375 (~C).
store( i+IT::size*2UL, j+1UL, xmm7 * factor );
2376 (~C).
store( i+IT::size*3UL, j+1UL, xmm8 * factor );
2380 for(
size_t k=0UL; k<K; ++k ) {
2382 xmm1 = xmm1 + A.load(i ,k) * b1;
2383 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2384 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2385 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2387 (~C).
store( i , j, xmm1 * factor );
2388 (~C).
store( i+IT::size , j, xmm2 * factor );
2389 (~C).
store( i+IT::size*2UL, j, xmm3 * factor );
2390 (~C).
store( i+IT::size*3UL, j, xmm4 * factor );
2393 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2395 for( ; (j+2UL) <= N; j+=2UL ) {
2397 for(
size_t k=0UL; k<K; ++k ) {
2402 xmm1 = xmm1 + a1 * b1;
2403 xmm2 = xmm2 + a2 * b1;
2404 xmm3 = xmm3 + a1 * b2;
2405 xmm4 = xmm4 + a2 * b2;
2407 (~C).
store( i , j , xmm1 * factor );
2408 (~C).
store( i+IT::size, j , xmm2 * factor );
2409 (~C).
store( i , j+1UL, xmm3 * factor );
2410 (~C).
store( i+IT::size, j+1UL, xmm4 * factor );
2414 for(
size_t k=0UL; k<K; ++k ) {
2416 xmm1 = xmm1 + A.load(i ,k) * b1;
2417 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
2419 (~C).
store( i , j, xmm1 * factor );
2420 (~C).
store( i+IT::size, j, xmm2 * factor );
2425 for( ; (j+2UL) <= N; j+=2UL ) {
2427 for(
size_t k=0UL; k<K; ++k ) {
2429 xmm1 = xmm1 + a1 *
set( B(k,j ) );
2430 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
2432 (~C).
store( i, j , xmm1 * factor );
2433 (~C).
store( i, j+1UL, xmm2 * factor );
2437 for(
size_t k=0UL; k<K; ++k ) {
2438 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
2440 (~C).
store( i, j, xmm1 * factor );
2460 template<
typename MT3
2464 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2465 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2467 selectDefaultAssignKernel( C, A, B, scalar );
2486 template<
typename MT3
2490 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2491 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2493 using boost::numeric_cast;
2499 const int M ( numeric_cast<int>( A.rows() ) );
2500 const int N ( numeric_cast<int>( B.columns() ) );
2501 const int K ( numeric_cast<int>( A.columns() ) );
2502 const int lda( numeric_cast<int>( A.spacing() ) );
2503 const int ldb( numeric_cast<int>( B.spacing() ) );
2504 const int ldc( numeric_cast<int>( C.spacing() ) );
2506 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2507 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2508 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2509 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2529 template<
typename MT3
2533 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2534 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2536 using boost::numeric_cast;
2542 const int M ( numeric_cast<int>( A.rows() ) );
2543 const int N ( numeric_cast<int>( B.columns() ) );
2544 const int K ( numeric_cast<int>( A.columns() ) );
2545 const int lda( numeric_cast<int>( A.spacing() ) );
2546 const int ldb( numeric_cast<int>( B.spacing() ) );
2547 const int ldc( numeric_cast<int>( C.spacing() ) );
2549 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2550 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2551 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2552 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2572 template<
typename MT3
2576 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2577 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2579 using boost::numeric_cast;
2588 const int M ( numeric_cast<int>( A.rows() ) );
2589 const int N ( numeric_cast<int>( B.columns() ) );
2590 const int K ( numeric_cast<int>( A.columns() ) );
2591 const int lda( numeric_cast<int>( A.spacing() ) );
2592 const int ldb( numeric_cast<int>( B.spacing() ) );
2593 const int ldc( numeric_cast<int>( C.spacing() ) );
2594 const complex<float> alpha( scalar );
2595 const complex<float> beta ( 0.0F, 0.0F );
2597 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2598 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2599 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2600 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2620 template<
typename MT3
2624 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2625 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2627 using boost::numeric_cast;
2636 const int M ( numeric_cast<int>( A.rows() ) );
2637 const int N ( numeric_cast<int>( B.columns() ) );
2638 const int K ( numeric_cast<int>( A.columns() ) );
2639 const int lda( numeric_cast<int>( A.spacing() ) );
2640 const int ldb( numeric_cast<int>( B.spacing() ) );
2641 const int ldc( numeric_cast<int>( C.spacing() ) );
2642 const complex<double> alpha( scalar );
2643 const complex<double> beta ( 0.0, 0.0 );
2645 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2646 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2647 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2648 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2665 template<
typename MT
2667 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
2671 typedef typename SelectType< SO, ResultType, OppositeType >::Type TmpType;
2683 const TmpType tmp( rhs );
2700 template<
typename MT3
2702 friend inline void addAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2709 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2710 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2712 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
2727 DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2729 DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2747 template<
typename MT3
2751 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2752 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2773 template<
typename MT3
2777 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2778 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2783 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2787 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2791 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
2816 template<
typename MT3
2820 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2821 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2823 typedef IntrinsicTrait<ElementType> IT;
2825 const size_t M( A.rows() );
2826 const size_t N( B.columns() );
2827 const size_t K( A.columns() );
2833 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
2834 for(
size_t j=0UL; j<N; ++j ) {
2835 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2836 for(
size_t k=0UL; k<K; ++k ) {
2838 xmm1 = xmm1 + A.load(i ,k) * b1;
2839 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2840 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2841 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2842 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
2843 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
2844 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
2845 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
2847 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
2848 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
2849 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
2850 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
2851 (~C).
store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) + xmm5 * factor );
2852 (~C).
store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) + xmm6 * factor );
2853 (~C).
store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) + xmm7 * factor );
2854 (~C).
store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) + xmm8 * factor );
2857 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
2859 for( ; (j+2UL) <= N; j+=2UL ) {
2860 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2861 for(
size_t k=0UL; k<K; ++k ) {
2868 xmm1 = xmm1 + a1 * b1;
2869 xmm2 = xmm2 + a2 * b1;
2870 xmm3 = xmm3 + a3 * b1;
2871 xmm4 = xmm4 + a4 * b1;
2872 xmm5 = xmm5 + a1 * b2;
2873 xmm6 = xmm6 + a2 * b2;
2874 xmm7 = xmm7 + a3 * b2;
2875 xmm8 = xmm8 + a4 * b2;
2877 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
2878 (~C).
store( i+IT::size , j , (~C).load(i+IT::size ,j ) + xmm2 * factor );
2879 (~C).
store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) + xmm3 * factor );
2880 (~C).
store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) + xmm4 * factor );
2881 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) + xmm5 * factor );
2882 (~C).
store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) + xmm6 * factor );
2883 (~C).
store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) + xmm7 * factor );
2884 (~C).
store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) + xmm8 * factor );
2888 for(
size_t k=0UL; k<K; ++k ) {
2890 xmm1 = xmm1 + A.load(i ,k) * b1;
2891 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
2892 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
2893 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
2895 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
2896 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) + xmm2 * factor );
2897 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) + xmm3 * factor );
2898 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) + xmm4 * factor );
2901 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
2903 for( ; (j+2UL) <= N; j+=2UL ) {
2905 for(
size_t k=0UL; k<K; ++k ) {
2910 xmm1 = xmm1 + a1 * b1;
2911 xmm2 = xmm2 + a2 * b1;
2912 xmm3 = xmm3 + a1 * b2;
2913 xmm4 = xmm4 + a2 * b2;
2915 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
2916 (~C).
store( i+IT::size, j , (~C).load(i+IT::size,j ) + xmm2 * factor );
2917 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) + xmm3 * factor );
2918 (~C).
store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) + xmm4 * factor );
2922 for(
size_t k=0UL; k<K; ++k ) {
2924 xmm1 = xmm1 + A.load(i ,k) * b1;
2925 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
2927 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
2928 (~C).
store( i+IT::size, j, (~C).load(i+IT::size,j) + xmm2 * factor );
2933 for( ; (j+2UL) <= N; j+=2UL ) {
2935 for(
size_t k=0UL; k<K; ++k ) {
2937 xmm1 = xmm1 + a1 *
set( B(k,j ) );
2938 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
2940 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
2941 (~C).
store( i, j+1UL, (~C).load(i,j+1UL) + xmm2 * factor );
2945 for(
size_t k=0UL; k<K; ++k ) {
2946 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
2948 (~C).
store( i, j, (~C).load(i,j) + xmm1 * factor );
2968 template<
typename MT3
2972 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2973 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2975 selectDefaultAddAssignKernel( C, A, B, scalar );
2994 template<
typename MT3
2998 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2999 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3001 using boost::numeric_cast;
3007 const int M ( numeric_cast<int>( A.rows() ) );
3008 const int N ( numeric_cast<int>( B.columns() ) );
3009 const int K ( numeric_cast<int>( A.columns() ) );
3010 const int lda( numeric_cast<int>( A.spacing() ) );
3011 const int ldb( numeric_cast<int>( B.spacing() ) );
3012 const int ldc( numeric_cast<int>( C.spacing() ) );
3014 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3015 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3016 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3017 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3037 template<
typename MT3
3041 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3042 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3044 using boost::numeric_cast;
3050 const int M ( numeric_cast<int>( A.rows() ) );
3051 const int N ( numeric_cast<int>( B.columns() ) );
3052 const int K ( numeric_cast<int>( A.columns() ) );
3053 const int lda( numeric_cast<int>( A.spacing() ) );
3054 const int ldb( numeric_cast<int>( B.spacing() ) );
3055 const int ldc( numeric_cast<int>( C.spacing() ) );
3057 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3058 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3059 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3060 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3080 template<
typename MT3
3084 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3085 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3087 using boost::numeric_cast;
3096 const int M ( numeric_cast<int>( A.rows() ) );
3097 const int N ( numeric_cast<int>( B.columns() ) );
3098 const int K ( numeric_cast<int>( A.columns() ) );
3099 const int lda( numeric_cast<int>( A.spacing() ) );
3100 const int ldb( numeric_cast<int>( B.spacing() ) );
3101 const int ldc( numeric_cast<int>( C.spacing() ) );
3102 const complex<float> alpha( scalar );
3103 const complex<float> beta ( 1.0F, 0.0F );
3105 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3106 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3107 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3108 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3128 template<
typename MT3
3132 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3133 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3135 using boost::numeric_cast;
3144 const int M ( numeric_cast<int>( A.rows() ) );
3145 const int N ( numeric_cast<int>( B.columns() ) );
3146 const int K ( numeric_cast<int>( A.columns() ) );
3147 const int lda( numeric_cast<int>( A.spacing() ) );
3148 const int ldb( numeric_cast<int>( B.spacing() ) );
3149 const int ldc( numeric_cast<int>( C.spacing() ) );
3150 const complex<double> alpha( scalar );
3151 const complex<double> beta ( 1.0, 0.0 );
3153 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3154 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3155 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3156 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3177 template<
typename MT3
3179 friend inline void subAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
3186 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3187 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3189 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3204 DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3206 DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3224 template<
typename MT3
3228 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3229 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3250 template<
typename MT3
3254 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3255 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3260 if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3264 else if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3268 else if( B.rows() * B.columns() <= A.rows() * A.columns() ) {
3293 template<
typename MT3
3297 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3298 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3300 typedef IntrinsicTrait<ElementType> IT;
3302 const size_t M( A.rows() );
3303 const size_t N( B.columns() );
3304 const size_t K( A.columns() );
3310 for( ; (i+IT::size*7UL) < M; i+=IT::size*8UL ) {
3311 for(
size_t j=0UL; j<N; ++j ) {
3312 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3313 for(
size_t k=0UL; k<K; ++k ) {
3315 xmm1 = xmm1 + A.load(i ,k) * b1;
3316 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3317 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3318 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3319 xmm5 = xmm5 + A.load(i+IT::size*4UL,k) * b1;
3320 xmm6 = xmm6 + A.load(i+IT::size*5UL,k) * b1;
3321 xmm7 = xmm7 + A.load(i+IT::size*6UL,k) * b1;
3322 xmm8 = xmm8 + A.load(i+IT::size*7UL,k) * b1;
3324 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3325 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3326 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3327 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3328 (~C).
store( i+IT::size*4UL, j, (~C).load(i+IT::size*4UL,j) - xmm5 * factor );
3329 (~C).
store( i+IT::size*5UL, j, (~C).load(i+IT::size*5UL,j) - xmm6 * factor );
3330 (~C).
store( i+IT::size*6UL, j, (~C).load(i+IT::size*6UL,j) - xmm7 * factor );
3331 (~C).
store( i+IT::size*7UL, j, (~C).load(i+IT::size*7UL,j) - xmm8 * factor );
3334 for( ; (i+IT::size*3UL) < M; i+=IT::size*4UL ) {
3336 for( ; (j+2UL) <= N; j+=2UL ) {
3337 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3338 for(
size_t k=0UL; k<K; ++k ) {
3345 xmm1 = xmm1 + a1 * b1;
3346 xmm2 = xmm2 + a2 * b1;
3347 xmm3 = xmm3 + a3 * b1;
3348 xmm4 = xmm4 + a4 * b1;
3349 xmm5 = xmm5 + a1 * b2;
3350 xmm6 = xmm6 + a2 * b2;
3351 xmm7 = xmm7 + a3 * b2;
3352 xmm8 = xmm8 + a4 * b2;
3354 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3355 (~C).
store( i+IT::size , j , (~C).load(i+IT::size ,j ) - xmm2 * factor );
3356 (~C).
store( i+IT::size*2UL, j , (~C).load(i+IT::size*2UL,j ) - xmm3 * factor );
3357 (~C).
store( i+IT::size*3UL, j , (~C).load(i+IT::size*3UL,j ) - xmm4 * factor );
3358 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) - xmm5 * factor );
3359 (~C).
store( i+IT::size , j+1UL, (~C).load(i+IT::size ,j+1UL) - xmm6 * factor );
3360 (~C).
store( i+IT::size*2UL, j+1UL, (~C).load(i+IT::size*2UL,j+1UL) - xmm7 * factor );
3361 (~C).
store( i+IT::size*3UL, j+1UL, (~C).load(i+IT::size*3UL,j+1UL) - xmm8 * factor );
3365 for(
size_t k=0UL; k<K; ++k ) {
3367 xmm1 = xmm1 + A.load(i ,k) * b1;
3368 xmm2 = xmm2 + A.load(i+IT::size ,k) * b1;
3369 xmm3 = xmm3 + A.load(i+IT::size*2UL,k) * b1;
3370 xmm4 = xmm4 + A.load(i+IT::size*3UL,k) * b1;
3372 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3373 (~C).
store( i+IT::size , j, (~C).load(i+IT::size ,j) - xmm2 * factor );
3374 (~C).
store( i+IT::size*2UL, j, (~C).load(i+IT::size*2UL,j) - xmm3 * factor );
3375 (~C).
store( i+IT::size*3UL, j, (~C).load(i+IT::size*3UL,j) - xmm4 * factor );
3378 for( ; (i+IT::size) < M; i+=IT::size*2UL ) {
3380 for( ; (j+2UL) <= N; j+=2UL ) {
3382 for(
size_t k=0UL; k<K; ++k ) {
3387 xmm1 = xmm1 + a1 * b1;
3388 xmm2 = xmm2 + a2 * b1;
3389 xmm3 = xmm3 + a1 * b2;
3390 xmm4 = xmm4 + a2 * b2;
3392 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3393 (~C).
store( i+IT::size, j , (~C).load(i+IT::size,j ) - xmm2 * factor );
3394 (~C).
store( i , j+1UL, (~C).load(i ,j+1UL) - xmm3 * factor );
3395 (~C).
store( i+IT::size, j+1UL, (~C).load(i+IT::size,j+1UL) - xmm4 * factor );
3399 for(
size_t k=0UL; k<K; ++k ) {
3401 xmm1 = xmm1 + A.load(i ,k) * b1;
3402 xmm2 = xmm2 + A.load(i+IT::size,k) * b1;
3404 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3405 (~C).
store( i+IT::size, j, (~C).load(i+IT::size,j) - xmm2 * factor );
3410 for( ; (j+2UL) <= N; j+=2UL ) {
3412 for(
size_t k=0UL; k<K; ++k ) {
3414 xmm1 = xmm1 + a1 *
set( B(k,j ) );
3415 xmm2 = xmm2 + a1 *
set( B(k,j+1UL) );
3417 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
3418 (~C).
store( i, j+1UL, (~C).load(i,j+1UL) - xmm2 * factor );
3422 for(
size_t k=0UL; k<K; ++k ) {
3423 xmm1 = xmm1 + A.load(i,k) *
set( B(k,j) );
3425 (~C).
store( i, j, (~C).load(i,j) - xmm1 * factor );
3445 template<
typename MT3
3449 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3450 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3452 selectDefaultSubAssignKernel( C, A, B, scalar );
3471 template<
typename MT3
3475 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3476 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3478 using boost::numeric_cast;
3484 const int M ( numeric_cast<int>( A.rows() ) );
3485 const int N ( numeric_cast<int>( B.columns() ) );
3486 const int K ( numeric_cast<int>( A.columns() ) );
3487 const int lda( numeric_cast<int>( A.spacing() ) );
3488 const int ldb( numeric_cast<int>( B.spacing() ) );
3489 const int ldc( numeric_cast<int>( C.spacing() ) );
3491 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3492 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3493 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3494 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3514 template<
typename MT3
3518 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3519 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3521 using boost::numeric_cast;
3527 const int M ( numeric_cast<int>( A.rows() ) );
3528 const int N ( numeric_cast<int>( B.columns() ) );
3529 const int K ( numeric_cast<int>( A.columns() ) );
3530 const int lda( numeric_cast<int>( A.spacing() ) );
3531 const int ldb( numeric_cast<int>( B.spacing() ) );
3532 const int ldc( numeric_cast<int>( C.spacing() ) );
3534 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3535 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3536 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3537 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3557 template<
typename MT3
3561 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3562 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3564 using boost::numeric_cast;
3573 const int M ( numeric_cast<int>( A.rows() ) );
3574 const int N ( numeric_cast<int>( B.columns() ) );
3575 const int K ( numeric_cast<int>( A.columns() ) );
3576 const int lda( numeric_cast<int>( A.spacing() ) );
3577 const int ldb( numeric_cast<int>( B.spacing() ) );
3578 const int ldc( numeric_cast<int>( C.spacing() ) );
3579 const complex<float> alpha( -scalar );
3580 const complex<float> beta ( 1.0F, 0.0F );
3582 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3583 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3584 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3585 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3605 template<
typename MT3
3609 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3610 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3612 using boost::numeric_cast;
3621 const int M ( numeric_cast<int>( A.rows() ) );
3622 const int N ( numeric_cast<int>( B.columns() ) );
3623 const int K ( numeric_cast<int>( A.columns() ) );
3624 const int lda( numeric_cast<int>( A.spacing() ) );
3625 const int ldb( numeric_cast<int>( B.spacing() ) );
3626 const int ldc( numeric_cast<int>( C.spacing() ) );
3627 const complex<double> alpha( -scalar );
3628 const complex<double> beta ( 1.0, 0.0 );
3630 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3631 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3632 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3633 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3699 template<
typename T1
3701 inline const TDMatTDMatMultExpr<T1,T2>
3707 throw std::invalid_argument(
"Matrix sizes do not match" );
3724 template<
typename MT1,
typename MT2,
typename VT >
3729 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3730 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3731 IsDenseVector<VT>::value && IsColumnVector<VT>::value
3732 ,
typename TDMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
3733 , INVALID_TYPE >::Type Type;
3742 template<
typename MT1,
typename MT2,
typename VT >
3747 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3748 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
3749 IsSparseVector<VT>::value && IsColumnVector<VT>::value
3750 ,
typename TDMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
3751 , INVALID_TYPE >::Type Type;
3760 template<
typename VT,
typename MT1,
typename MT2 >
3765 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
3766 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3767 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3768 ,
typename TDVecTDMatMultExprTrait< typename TDVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3769 , INVALID_TYPE >::Type Type;
3778 template<
typename VT,
typename MT1,
typename MT2 >
3783 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
3784 IsDenseMatrix<MT1>::value && IsColumnMajorMatrix<MT1>::value &&
3785 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
3786 ,
typename TDVecTDMatMultExprTrait< typename TSVecTDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
3787 , INVALID_TYPE >::Type Type;
3796 template<
typename MT1,
typename MT2 >
3801 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1>::Type
3802 ,
typename SubmatrixExprTrait<const MT2>::Type >::Type Type;
3811 template<
typename MT1,
typename MT2 >
3816 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
3825 template<
typename MT1,
typename MT2 >
3830 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:228
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:122
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
const ElementType ReturnType
Return type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:224
const size_t TDMATTDMATMULT_THRESHOLD
Column-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies t...
Definition: Thresholds.h:170
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:357
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:121
Header file for the IsColumnMajorMatrix type trait.
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:219
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: TDMatTDMatMultExpr.h:266
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:246
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Expression object for transpose dense matrix-transpose dense matrix multiplications.The TDMatTDMatMultExpr class represents the compile time expression for multiplications between two column-major dense matrices.
Definition: Forward.h:127
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: TDMatTDMatMultExpr.h:338
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2371
Header file for the IsFloat type trait.
LeftOperand leftOperand() const
Returns the left-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:316
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:221
TDMatTDMatMultExpr< MT1, MT2 > This
Type of this TDMatTDMatMultExpr instance.
Definition: TDMatTDMatMultExpr.h:218
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the multiplication trait.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:117
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
Header file for the TSVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:231
Header file for the TDMatSVecMultExprTrait class template.
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: TDMatTDMatMultExpr.h:326
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
MT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:120
SelectType< IsComputation< MT1 >::value, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:234
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
const ResultType CompositeType
Data type for composite expression templates.
Definition: TDMatTDMatMultExpr.h:225
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: TDMatTDMatMultExpr.h:350
Header file for the IsNumeric type trait.
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: TDMatTDMatMultExpr.h:220
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
ResultType::ElementType ElementType
Resulting element type.
Definition: TDMatTDMatMultExpr.h:222
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
size_t rows() const
Returns the current number of rows of the matrix.
Definition: TDMatTDMatMultExpr.h:296
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
MT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:119
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
TDMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the TDMatTDMatMultExpr class.
Definition: TDMatTDMatMultExpr.h:251
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: TDMatTDMatMultExpr.h:118
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the complex data type.
Header file for the IsColumnVector type trait.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
Header file for the IsResizable type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Operand matrix_
The dense matrix containing the submatrix.
Definition: DenseSubmatrix.h:2792
Header file for the TDVecTDMatMultExprTrait class template.
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
void store(float *address, const sse_float_t &value)
Aligned store of a vector of 'float' values.
Definition: Store.h:242
size_t columns() const
Returns the current number of columns of the matrix.
Definition: TDMatTDMatMultExpr.h:306
Header file for the IsExpression type trait class.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: TDMatTDMatMultExpr.h:223
SelectType< IsComputation< MT2 >::value, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: TDMatTDMatMultExpr.h:237
Header file for the FunctionTrace class.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: TDMatTDMatMultExpr.h:358