22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
31 #include <boost/cast.hpp>
96 template<
typename MT1
104 typedef typename MT1::ResultType
RT1;
105 typedef typename MT2::ResultType
RT2;
106 typedef typename MT1::CompositeType
CT1;
107 typedef typename MT2::CompositeType
CT2;
115 template<
typename T1,
typename T2,
typename T3 >
116 struct UseSinglePrecisionKernel {
129 template<
typename T1,
typename T2,
typename T3 >
130 struct UseDoublePrecisionKernel {
144 template<
typename T1,
typename T2,
typename T3 >
145 struct UseSinglePrecisionComplexKernel {
146 typedef complex<float> Type;
147 enum { value = IsSame<typename T1::ElementType,Type>::value &&
148 IsSame<typename T2::ElementType,Type>::value &&
149 IsSame<typename T3::ElementType,Type>::value };
160 template<
typename T1,
typename T2,
typename T3 >
161 struct UseDoublePrecisionComplexKernel {
162 typedef complex<double> Type;
163 enum { value = IsSame<typename T1::ElementType,Type>::value &&
164 IsSame<typename T2::ElementType,Type>::value &&
165 IsSame<typename T3::ElementType,Type>::value };
175 template<
typename T1,
typename T2,
typename T3 >
176 struct UseDefaultKernel {
177 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
178 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
179 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
180 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
190 template<
typename T1,
typename T2,
typename T3 >
191 struct UseVectorizedDefaultKernel {
192 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
193 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
194 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
195 IntrinsicTrait<typename T1::ElementType>::addition &&
196 IntrinsicTrait<typename T1::ElementType>::multiplication };
227 enum { vectorizable = 0 };
257 if(
lhs_.columns() != 0UL ) {
258 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
260 for(
size_t k=1UL; k<end; k+=2UL ) {
262 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
264 if( end <
lhs_.columns() ) {
292 return rhs_.columns();
322 template<
typename T >
324 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
334 template<
typename T >
336 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
355 template<
typename MT
364 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
367 else if( rhs.
lhs_.columns() == 0UL ) {
383 DMatTDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
385 DMatTDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
404 template<
typename MT3
408 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
410 const size_t M( A.rows() );
411 const size_t N( B.columns() );
412 const size_t K( A.columns() );
414 for(
size_t i=0UL; i<M; ++i ) {
415 for(
size_t j=0UL; j<N; ++j ) {
416 C(i,j) = A(i,0UL) * B(0UL,j);
418 for(
size_t k=1UL; k<K; ++k ) {
419 for(
size_t j=0UL; j<N; ++j ) {
420 C(i,j) += A(i,k) * B(k,j);
442 template<
typename MT3
445 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
446 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
448 typedef IntrinsicTrait<ElementType> IT;
450 const size_t M( A.rows() );
451 const size_t N( B.columns() );
452 const size_t K( A.columns() );
456 for( ; (i+2UL) <= M; i+=2UL ) {
458 for( ; (j+4UL) <= N; j+=4UL ) {
459 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
460 for(
size_t k=0UL; k<K; k+=IT::size ) {
467 xmm1 = xmm1 + a1 * b1;
468 xmm2 = xmm2 + a1 * b2;
469 xmm3 = xmm3 + a1 * b3;
470 xmm4 = xmm4 + a1 * b4;
471 xmm5 = xmm5 + a2 * b1;
472 xmm6 = xmm6 + a2 * b2;
473 xmm7 = xmm7 + a2 * b3;
474 xmm8 = xmm8 + a2 * b4;
476 (~C)(i ,j ) =
sum( xmm1 );
477 (~C)(i ,j+1UL) =
sum( xmm2 );
478 (~C)(i ,j+2UL) =
sum( xmm3 );
479 (~C)(i ,j+3UL) =
sum( xmm4 );
480 (~C)(i+1UL,j ) =
sum( xmm5 );
481 (~C)(i+1UL,j+1UL) =
sum( xmm6 );
482 (~C)(i+1UL,j+2UL) =
sum( xmm7 );
483 (~C)(i+1UL,j+3UL) =
sum( xmm8 );
485 for( ; (j+2UL) <= N; j+=2UL ) {
487 for(
size_t k=0UL; k<K; k+=IT::size ) {
492 xmm1 = xmm1 + a1 * b1;
493 xmm2 = xmm2 + a1 * b2;
494 xmm3 = xmm3 + a2 * b1;
495 xmm4 = xmm4 + a2 * b2;
497 (~C)(i ,j ) =
sum( xmm1 );
498 (~C)(i ,j+1UL) =
sum( xmm2 );
499 (~C)(i+1UL,j ) =
sum( xmm3 );
500 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
504 for(
size_t k=0UL; k<K; k+=IT::size ) {
506 xmm1 = xmm1 + A.get(i ,k) * b1;
507 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
509 (~C)(i ,j) =
sum( xmm1 );
510 (~C)(i+1UL,j) =
sum( xmm2 );
515 for( ; (j+4UL) <= N; j+=4UL ) {
517 for(
size_t k=0UL; k<K; k+=IT::size ) {
519 xmm1 = xmm1 + a1 * B.get(k,j );
520 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
521 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
522 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
524 (~C)(i,j ) =
sum( xmm1 );
525 (~C)(i,j+1UL) =
sum( xmm2 );
526 (~C)(i,j+2UL) =
sum( xmm3 );
527 (~C)(i,j+3UL) =
sum( xmm4 );
529 for( ; (j+2UL) <= N; j+=2UL ) {
531 for(
size_t k=0UL; k<K; k+=IT::size ) {
533 xmm1 = xmm1 + a1 * B.get(k,j );
534 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
536 (~C)(i,j ) =
sum( xmm1 );
537 (~C)(i,j+1UL) =
sum( xmm2 );
541 for(
size_t k=0UL; k<K; k+=IT::size ) {
542 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
544 (~C)(i,j) =
sum( xmm1 );
565 template<
typename MT3
568 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
569 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
571 typedef IntrinsicTrait<ElementType> IT;
573 const size_t M( A.rows() );
574 const size_t N( B.columns() );
575 const size_t K( A.columns() );
579 for( ; (i+4UL) <= M; i+=4UL ) {
581 for( ; (j+2UL) <= N; j+=2UL ) {
582 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
583 for(
size_t k=0UL; k<K; k+=IT::size ) {
590 xmm1 = xmm1 + a1 * b1;
591 xmm2 = xmm2 + a1 * b2;
592 xmm3 = xmm3 + a2 * b1;
593 xmm4 = xmm4 + a2 * b2;
594 xmm5 = xmm5 + a3 * b1;
595 xmm6 = xmm6 + a3 * b2;
596 xmm7 = xmm7 + a4 * b1;
597 xmm8 = xmm8 + a4 * b2;
599 (~C)(i ,j ) =
sum( xmm1 );
600 (~C)(i ,j+1UL) =
sum( xmm2 );
601 (~C)(i+1UL,j ) =
sum( xmm3 );
602 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
603 (~C)(i+2UL,j ) =
sum( xmm5 );
604 (~C)(i+2UL,j+1UL) =
sum( xmm6 );
605 (~C)(i+3UL,j ) =
sum( xmm7 );
606 (~C)(i+3UL,j+1UL) =
sum( xmm8 );
610 for(
size_t k=0UL; k<K; k+=IT::size ) {
612 xmm1 = xmm1 + A.get(i ,k) * b1;
613 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
614 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
615 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
617 (~C)(i ,j) =
sum( xmm1 );
618 (~C)(i+1UL,j) =
sum( xmm2 );
619 (~C)(i+2UL,j) =
sum( xmm3 );
620 (~C)(i+3UL,j) =
sum( xmm4 );
623 for( ; (i+2UL) <= M; i+=2UL ) {
625 for( ; (j+2UL) <= N; j+=2UL ) {
627 for(
size_t k=0UL; k<K; k+=IT::size ) {
632 xmm1 = xmm1 + a1 * b1;
633 xmm2 = xmm2 + a1 * b2;
634 xmm3 = xmm3 + a2 * b1;
635 xmm4 = xmm4 + a2 * b2;
637 (~C)(i ,j ) =
sum( xmm1 );
638 (~C)(i ,j+1UL) =
sum( xmm2 );
639 (~C)(i+1UL,j ) =
sum( xmm3 );
640 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
644 for(
size_t k=0UL; k<K; k+=IT::size ) {
646 xmm1 = xmm1 + A.get(i ,k) * b1;
647 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
649 (~C)(i ,j) =
sum( xmm1 );
650 (~C)(i+1UL,j) =
sum( xmm2 );
655 for( ; (j+2UL) <= N; j+=2UL ) {
657 for(
size_t k=0UL; k<K; k+=IT::size ) {
659 xmm1 = xmm1 + a1 * B.get(k,j );
660 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
662 (~C)(i,j ) =
sum( xmm1 );
663 (~C)(i,j+1UL) =
sum( xmm2 );
667 for(
size_t k=0UL; k<K; k+=IT::size ) {
668 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
670 (~C)(i,j) =
sum( xmm1 );
691 template<
typename MT3
694 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
695 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
697 selectDefaultAssignKernel( C, A, B );
717 template<
typename MT3
720 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
721 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
723 using boost::numeric_cast;
729 const int M ( numeric_cast<int>( A.rows() ) );
730 const int N ( numeric_cast<int>( B.columns() ) );
731 const int K ( numeric_cast<int>( A.columns() ) );
732 const int lda( numeric_cast<int>( A.spacing() ) );
733 const int ldb( numeric_cast<int>( B.spacing() ) );
734 const int ldc( numeric_cast<int>( C.spacing() ) );
736 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
737 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
738 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
739 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
760 template<
typename MT3
763 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
764 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
766 using boost::numeric_cast;
772 const int M ( numeric_cast<int>( A.rows() ) );
773 const int N ( numeric_cast<int>( B.columns() ) );
774 const int K ( numeric_cast<int>( A.columns() ) );
775 const int lda( numeric_cast<int>( A.spacing() ) );
776 const int ldb( numeric_cast<int>( B.spacing() ) );
777 const int ldc( numeric_cast<int>( C.spacing() ) );
779 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
780 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
781 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
782 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
803 template<
typename MT3
806 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
807 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
809 using boost::numeric_cast;
818 const int M ( numeric_cast<int>( A.rows() ) );
819 const int N ( numeric_cast<int>( B.columns() ) );
820 const int K ( numeric_cast<int>( A.columns() ) );
821 const int lda( numeric_cast<int>( A.spacing() ) );
822 const int ldb( numeric_cast<int>( B.spacing() ) );
823 const int ldc( numeric_cast<int>( C.spacing() ) );
824 const complex<float> alpha( 1.0F, 0.0F );
825 const complex<float> beta ( 0.0F, 0.0F );
827 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
828 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
829 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
830 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
851 template<
typename MT3
854 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
855 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
857 using boost::numeric_cast;
866 const int M ( numeric_cast<int>( A.rows() ) );
867 const int N ( numeric_cast<int>( B.columns() ) );
868 const int K ( numeric_cast<int>( A.columns() ) );
869 const int lda( numeric_cast<int>( A.spacing() ) );
870 const int ldb( numeric_cast<int>( B.spacing() ) );
871 const int ldc( numeric_cast<int>( C.spacing() ) );
872 const complex<double> alpha( 1.0, 0.0 );
873 const complex<double> beta ( 0.0, 0.0 );
875 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
876 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
877 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
878 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
896 template<
typename MT
902 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
914 const TmpType tmp( rhs );
933 template<
typename MT
942 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
957 DMatTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
959 DMatTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
978 template<
typename MT3
981 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
982 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
984 const size_t M( A.rows() );
985 const size_t N( B.columns() );
986 const size_t K( A.columns() );
989 const size_t end( N &
size_t(-2) );
991 for(
size_t i=0UL; i<M; ++i ) {
992 for(
size_t k=0UL; k<K; ++k ) {
993 for(
size_t j=0UL; j<end; j+=2UL ) {
994 C(i,j ) += A(i,k) * B(k,j );
995 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
998 C(i,end) += A(i,k) * B(k,end);
1020 template<
typename MT3
1023 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1024 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1026 typedef IntrinsicTrait<ElementType> IT;
1028 const size_t M( A.rows() );
1029 const size_t N( B.columns() );
1030 const size_t K( A.columns() );
1034 for( ; (i+2UL) <= M; i+=2UL ) {
1036 for( ; (j+4UL) <= N; j+=4UL ) {
1037 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1038 for(
size_t k=0UL; k<K; k+=IT::size ) {
1045 xmm1 = xmm1 + a1 * b1;
1046 xmm2 = xmm2 + a1 * b2;
1047 xmm3 = xmm3 + a1 * b3;
1048 xmm4 = xmm4 + a1 * b4;
1049 xmm5 = xmm5 + a2 * b1;
1050 xmm6 = xmm6 + a2 * b2;
1051 xmm7 = xmm7 + a2 * b3;
1052 xmm8 = xmm8 + a2 * b4;
1054 (~C)(i ,j ) +=
sum( xmm1 );
1055 (~C)(i ,j+1UL) +=
sum( xmm2 );
1056 (~C)(i ,j+2UL) +=
sum( xmm3 );
1057 (~C)(i ,j+3UL) +=
sum( xmm4 );
1058 (~C)(i+1UL,j ) +=
sum( xmm5 );
1059 (~C)(i+1UL,j+1UL) +=
sum( xmm6 );
1060 (~C)(i+1UL,j+2UL) +=
sum( xmm7 );
1061 (~C)(i+1UL,j+3UL) +=
sum( xmm8 );
1063 for( ; (j+2UL) <= N; j+=2UL ) {
1065 for(
size_t k=0UL; k<K; k+=IT::size ) {
1070 xmm1 = xmm1 + a1 * b1;
1071 xmm2 = xmm2 + a1 * b2;
1072 xmm3 = xmm3 + a2 * b1;
1073 xmm4 = xmm4 + a2 * b2;
1075 (~C)(i ,j ) +=
sum( xmm1 );
1076 (~C)(i ,j+1UL) +=
sum( xmm2 );
1077 (~C)(i+1UL,j ) +=
sum( xmm3 );
1078 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1082 for(
size_t k=0UL; k<K; k+=IT::size ) {
1084 xmm1 = xmm1 + A.get(i ,k) * b1;
1085 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1087 (~C)(i ,j) +=
sum( xmm1 );
1088 (~C)(i+1UL,j) +=
sum( xmm2 );
1093 for( ; (j+4UL) <= N; j+=4UL ) {
1095 for(
size_t k=0UL; k<K; k+=IT::size ) {
1097 xmm1 = xmm1 + a1 * B.get(k,j );
1098 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1099 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
1100 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
1102 (~C)(i,j ) +=
sum( xmm1 );
1103 (~C)(i,j+1UL) +=
sum( xmm2 );
1104 (~C)(i,j+2UL) +=
sum( xmm3 );
1105 (~C)(i,j+3UL) +=
sum( xmm4 );
1107 for( ; (j+2UL) <= N; j+=2UL ) {
1109 for(
size_t k=0UL; k<K; k+=IT::size ) {
1111 xmm1 = xmm1 + a1 * B.get(k,j );
1112 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1114 (~C)(i,j ) +=
sum( xmm1 );
1115 (~C)(i,j+1UL) +=
sum( xmm2 );
1119 for(
size_t k=0UL; k<K; k+=IT::size ) {
1120 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
1122 (~C)(i,j) +=
sum( xmm1 );
1143 template<
typename MT3
1146 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1147 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1149 typedef IntrinsicTrait<ElementType> IT;
1151 const size_t M( A.rows() );
1152 const size_t N( B.columns() );
1153 const size_t K( A.columns() );
1157 for( ; (i+4UL) <= M; i+=4UL ) {
1159 for( ; (j+2UL) <= N; j+=2UL ) {
1160 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1161 for(
size_t k=0UL; k<K; k+=IT::size ) {
1168 xmm1 = xmm1 + a1 * b1;
1169 xmm2 = xmm2 + a1 * b2;
1170 xmm3 = xmm3 + a2 * b1;
1171 xmm4 = xmm4 + a2 * b2;
1172 xmm5 = xmm5 + a3 * b1;
1173 xmm6 = xmm6 + a3 * b2;
1174 xmm7 = xmm7 + a4 * b1;
1175 xmm8 = xmm8 + a4 * b2;
1177 (~C)(i ,j ) +=
sum( xmm1 );
1178 (~C)(i ,j+1UL) +=
sum( xmm2 );
1179 (~C)(i+1UL,j ) +=
sum( xmm3 );
1180 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1181 (~C)(i+2UL,j ) +=
sum( xmm5 );
1182 (~C)(i+2UL,j+1UL) +=
sum( xmm6 );
1183 (~C)(i+3UL,j ) +=
sum( xmm7 );
1184 (~C)(i+3UL,j+1UL) +=
sum( xmm8 );
1188 for(
size_t k=0UL; k<K; k+=IT::size ) {
1190 xmm1 = xmm1 + A.get(i ,k) * b1;
1191 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1192 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
1193 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
1195 (~C)(i ,j) +=
sum( xmm1 );
1196 (~C)(i+1UL,j) +=
sum( xmm2 );
1197 (~C)(i+2UL,j) +=
sum( xmm3 );
1198 (~C)(i+3UL,j) +=
sum( xmm4 );
1201 for( ; (i+2UL) <= M; i+=2UL ) {
1203 for( ; (j+2UL) <= N; j+=2UL ) {
1205 for(
size_t k=0UL; k<K; k+=IT::size ) {
1210 xmm1 = xmm1 + a1 * b1;
1211 xmm2 = xmm2 + a1 * b2;
1212 xmm3 = xmm3 + a2 * b1;
1213 xmm4 = xmm4 + a2 * b2;
1215 (~C)(i ,j ) +=
sum( xmm1 );
1216 (~C)(i ,j+1UL) +=
sum( xmm2 );
1217 (~C)(i+1UL,j ) +=
sum( xmm3 );
1218 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1222 for(
size_t k=0UL; k<K; k+=IT::size ) {
1224 xmm1 = xmm1 + A.get(i ,k) * b1;
1225 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1227 (~C)(i ,j) +=
sum( xmm1 );
1228 (~C)(i+1UL,j) +=
sum( xmm2 );
1233 for( ; (j+2UL) <= N; j+=2UL ) {
1235 for(
size_t k=0UL; k<K; k+=IT::size ) {
1237 xmm1 = xmm1 + a1 * B.get(k,j );
1238 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1240 (~C)(i,j ) +=
sum( xmm1 );
1241 (~C)(i,j+1UL) +=
sum( xmm2 );
1245 for(
size_t k=0UL; k<K; k+=IT::size ) {
1246 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
1248 (~C)(i,j) +=
sum( xmm1 );
1269 template<
typename MT3
1272 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1273 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1275 selectDefaultAddAssignKernel( C, A, B );
1295 template<
typename MT3
1298 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1299 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1301 using boost::numeric_cast;
1307 const int M ( numeric_cast<int>( A.rows() ) );
1308 const int N ( numeric_cast<int>( B.columns() ) );
1309 const int K ( numeric_cast<int>( A.columns() ) );
1310 const int lda( numeric_cast<int>( A.spacing() ) );
1311 const int ldb( numeric_cast<int>( B.spacing() ) );
1312 const int ldc( numeric_cast<int>( C.spacing() ) );
1314 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1315 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1316 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1317 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1338 template<
typename MT3
1341 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1342 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1344 using boost::numeric_cast;
1350 const int M ( numeric_cast<int>( A.rows() ) );
1351 const int N ( numeric_cast<int>( B.columns() ) );
1352 const int K ( numeric_cast<int>( A.columns() ) );
1353 const int lda( numeric_cast<int>( A.spacing() ) );
1354 const int ldb( numeric_cast<int>( B.spacing() ) );
1355 const int ldc( numeric_cast<int>( C.spacing() ) );
1357 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1358 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1359 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1360 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1381 template<
typename MT3
1384 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1385 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1387 using boost::numeric_cast;
1396 const int M ( numeric_cast<int>( A.rows() ) );
1397 const int N ( numeric_cast<int>( B.columns() ) );
1398 const int K ( numeric_cast<int>( A.columns() ) );
1399 const int lda( numeric_cast<int>( A.spacing() ) );
1400 const int ldb( numeric_cast<int>( B.spacing() ) );
1401 const int ldc( numeric_cast<int>( C.spacing() ) );
1402 const complex<float> alpha( 1.0F, 0.0F );
1403 const complex<float> beta ( 1.0F, 0.0F );
1405 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1406 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1407 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1408 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1429 template<
typename MT3
1432 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1433 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1435 using boost::numeric_cast;
1444 const int M ( numeric_cast<int>( A.rows() ) );
1445 const int N ( numeric_cast<int>( B.columns() ) );
1446 const int K ( numeric_cast<int>( A.columns() ) );
1447 const int lda( numeric_cast<int>( A.spacing() ) );
1448 const int ldb( numeric_cast<int>( B.spacing() ) );
1449 const int ldc( numeric_cast<int>( C.spacing() ) );
1450 const complex<double> alpha( 1.0, 0.0 );
1451 const complex<double> beta ( 1.0, 0.0 );
1453 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1454 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1455 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1456 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1479 template<
typename MT
1488 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1503 DMatTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1505 DMatTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1524 template<
typename MT3
1527 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1528 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1530 const size_t M( A.rows() );
1531 const size_t N( B.columns() );
1532 const size_t K( A.columns() );
1535 const size_t end( N &
size_t(-2) );
1537 for(
size_t i=0UL; i<M; ++i ) {
1538 for(
size_t k=0UL; k<K; ++k ) {
1539 for(
size_t j=0UL; j<end; j+=2UL ) {
1540 C(i,j ) -= A(i,k) * B(k,j );
1541 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1544 C(i,end) -= A(i,k) * B(k,end);
1566 template<
typename MT3
1569 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1570 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1572 typedef IntrinsicTrait<ElementType> IT;
1574 const size_t M( A.rows() );
1575 const size_t N( B.columns() );
1576 const size_t K( A.columns() );
1580 for( ; (i+2UL) <= M; i+=2UL ) {
1582 for( ; (j+4UL) <= N; j+=4UL ) {
1583 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1584 for(
size_t k=0UL; k<K; k+=IT::size ) {
1591 xmm1 = xmm1 + a1 * b1;
1592 xmm2 = xmm2 + a1 * b2;
1593 xmm3 = xmm3 + a1 * b3;
1594 xmm4 = xmm4 + a1 * b4;
1595 xmm5 = xmm5 + a2 * b1;
1596 xmm6 = xmm6 + a2 * b2;
1597 xmm7 = xmm7 + a2 * b3;
1598 xmm8 = xmm8 + a2 * b4;
1600 (~C)(i ,j ) -=
sum( xmm1 );
1601 (~C)(i ,j+1UL) -=
sum( xmm2 );
1602 (~C)(i ,j+2UL) -=
sum( xmm3 );
1603 (~C)(i ,j+3UL) -=
sum( xmm4 );
1604 (~C)(i+1UL,j ) -=
sum( xmm5 );
1605 (~C)(i+1UL,j+1UL) -=
sum( xmm6 );
1606 (~C)(i+1UL,j+2UL) -=
sum( xmm7 );
1607 (~C)(i+1UL,j+3UL) -=
sum( xmm8 );
1609 for( ; (j+2UL) <= N; j+=2UL ) {
1611 for(
size_t k=0UL; k<K; k+=IT::size ) {
1616 xmm1 = xmm1 + a1 * b1;
1617 xmm2 = xmm2 + a1 * b2;
1618 xmm3 = xmm3 + a2 * b1;
1619 xmm4 = xmm4 + a2 * b2;
1621 (~C)(i ,j ) -=
sum( xmm1 );
1622 (~C)(i ,j+1UL) -=
sum( xmm2 );
1623 (~C)(i+1UL,j ) -=
sum( xmm3 );
1624 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1628 for(
size_t k=0UL; k<K; k+=IT::size ) {
1630 xmm1 = xmm1 + A.get(i ,k) * b1;
1631 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1633 (~C)(i ,j) -=
sum( xmm1 );
1634 (~C)(i+1UL,j) -=
sum( xmm2 );
1639 for( ; (j+4UL) <= N; j+=4UL ) {
1641 for(
size_t k=0UL; k<K; k+=IT::size ) {
1643 xmm1 = xmm1 + a1 * B.get(k,j );
1644 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1645 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
1646 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
1648 (~C)(i,j ) -=
sum( xmm1 );
1649 (~C)(i,j+1UL) -=
sum( xmm2 );
1650 (~C)(i,j+2UL) -=
sum( xmm3 );
1651 (~C)(i,j+3UL) -=
sum( xmm4 );
1653 for( ; (j+2UL) <= N; j+=2UL ) {
1655 for(
size_t k=0UL; k<K; k+=IT::size ) {
1657 xmm1 = xmm1 + a1 * B.get(k,j );
1658 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1660 (~C)(i,j ) -=
sum( xmm1 );
1661 (~C)(i,j+1UL) -=
sum( xmm2 );
1665 for(
size_t k=0UL; k<K; k+=IT::size ) {
1666 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
1668 (~C)(i,j) -=
sum( xmm1 );
1689 template<
typename MT3
1692 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1693 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1695 typedef IntrinsicTrait<ElementType> IT;
1697 const size_t M( A.rows() );
1698 const size_t N( B.columns() );
1699 const size_t K( A.columns() );
1703 for( ; (i+4UL) <= M; i+=4UL ) {
1705 for( ; (j+2UL) <= N; j+=2UL ) {
1706 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1707 for(
size_t k=0UL; k<K; k+=IT::size ) {
1714 xmm1 = xmm1 + a1 * b1;
1715 xmm2 = xmm2 + a1 * b2;
1716 xmm3 = xmm3 + a2 * b1;
1717 xmm4 = xmm4 + a2 * b2;
1718 xmm5 = xmm5 + a3 * b1;
1719 xmm6 = xmm6 + a3 * b2;
1720 xmm7 = xmm7 + a4 * b1;
1721 xmm8 = xmm8 + a4 * b2;
1723 (~C)(i ,j ) -=
sum( xmm1 );
1724 (~C)(i ,j+1UL) -=
sum( xmm2 );
1725 (~C)(i+1UL,j ) -=
sum( xmm3 );
1726 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1727 (~C)(i+2UL,j ) -=
sum( xmm5 );
1728 (~C)(i+2UL,j+1UL) -=
sum( xmm6 );
1729 (~C)(i+3UL,j ) -=
sum( xmm7 );
1730 (~C)(i+3UL,j+1UL) -=
sum( xmm8 );
1734 for(
size_t k=0UL; k<K; k+=IT::size ) {
1736 xmm1 = xmm1 + A.get(i ,k) * b1;
1737 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1738 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
1739 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
1741 (~C)(i ,j) -=
sum( xmm1 );
1742 (~C)(i+1UL,j) -=
sum( xmm2 );
1743 (~C)(i+2UL,j) -=
sum( xmm3 );
1744 (~C)(i+3UL,j) -=
sum( xmm4 );
1747 for( ; (i+2UL) <= M; i+=2UL ) {
1749 for( ; (j+2UL) <= N; j+=2UL ) {
1751 for(
size_t k=0UL; k<K; k+=IT::size ) {
1756 xmm1 = xmm1 + a1 * b1;
1757 xmm2 = xmm2 + a1 * b2;
1758 xmm3 = xmm3 + a2 * b1;
1759 xmm4 = xmm4 + a2 * b2;
1761 (~C)(i ,j ) -=
sum( xmm1 );
1762 (~C)(i ,j+1UL) -=
sum( xmm2 );
1763 (~C)(i+1UL,j ) -=
sum( xmm3 );
1764 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1768 for(
size_t k=0UL; k<K; k+=IT::size ) {
1770 xmm1 = xmm1 + A.get(i ,k) * b1;
1771 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1773 (~C)(i ,j) -=
sum( xmm1 );
1774 (~C)(i+1UL,j) -=
sum( xmm2 );
1779 for( ; (j+2UL) <= N; j+=2UL ) {
1781 for(
size_t k=0UL; k<K; k+=IT::size ) {
1783 xmm1 = xmm1 + a1 * B.get(k,j );
1784 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1786 (~C)(i,j ) -=
sum( xmm1 );
1787 (~C)(i,j+1UL) -=
sum( xmm2 );
1791 for(
size_t k=0UL; k<K; k+=IT::size ) {
1792 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
1794 (~C)(i,j) -=
sum( xmm1 );
1815 template<
typename MT3
1818 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1819 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1821 selectDefaultSubAssignKernel( C, A, B );
1841 template<
typename MT3
1844 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1845 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1847 using boost::numeric_cast;
1853 const int M ( numeric_cast<int>( A.rows() ) );
1854 const int N ( numeric_cast<int>( B.columns() ) );
1855 const int K ( numeric_cast<int>( A.columns() ) );
1856 const int lda( numeric_cast<int>( A.spacing() ) );
1857 const int ldb( numeric_cast<int>( B.spacing() ) );
1858 const int ldc( numeric_cast<int>( C.spacing() ) );
1860 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1861 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1862 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1863 M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1884 template<
typename MT3
1887 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1888 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1890 using boost::numeric_cast;
1896 const int M ( numeric_cast<int>( A.rows() ) );
1897 const int N ( numeric_cast<int>( B.columns() ) );
1898 const int K ( numeric_cast<int>( A.columns() ) );
1899 const int lda( numeric_cast<int>( A.spacing() ) );
1900 const int ldb( numeric_cast<int>( B.spacing() ) );
1901 const int ldc( numeric_cast<int>( C.spacing() ) );
1903 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1904 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1905 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1906 M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1927 template<
typename MT3
1930 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1931 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1933 using boost::numeric_cast;
1942 const int M ( numeric_cast<int>( A.rows() ) );
1943 const int N ( numeric_cast<int>( B.columns() ) );
1944 const int K ( numeric_cast<int>( A.columns() ) );
1945 const int lda( numeric_cast<int>( A.spacing() ) );
1946 const int ldb( numeric_cast<int>( B.spacing() ) );
1947 const int ldc( numeric_cast<int>( C.spacing() ) );
1948 const complex<float> alpha( -1.0F, 0.0F );
1949 const complex<float> beta ( 1.0F, 0.0F );
1951 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1952 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1953 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1954 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1975 template<
typename MT3
1978 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1979 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1981 using boost::numeric_cast;
1990 const int M ( numeric_cast<int>( A.rows() ) );
1991 const int N ( numeric_cast<int>( B.columns() ) );
1992 const int K ( numeric_cast<int>( A.columns() ) );
1993 const int lda( numeric_cast<int>( A.spacing() ) );
1994 const int ldb( numeric_cast<int>( B.spacing() ) );
1995 const int ldc( numeric_cast<int>( C.spacing() ) );
1996 const complex<double> alpha( -1.0, 0.0 );
1997 const complex<double> beta ( 1.0, 0.0 );
1999 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2000 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2001 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2002 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2048 template<
typename MT1
2052 :
public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
2053 ,
private MatScalarMultExpr
2054 ,
private Computation
2058 typedef DMatTDMatMultExpr<MT1,MT2> MMM;
2059 typedef typename MMM::ResultType RES;
2060 typedef typename MT1::ResultType
RT1;
2061 typedef typename MT2::ResultType
RT2;
2062 typedef typename MT1::CompositeType
CT1;
2063 typedef typename MT2::CompositeType
CT2;
2071 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2072 struct UseSinglePrecisionKernel {
2073 enum { value = IsFloat<typename T1::ElementType>::value &&
2074 IsFloat<typename T2::ElementType>::value &&
2075 IsFloat<typename T3::ElementType>::value &&
2076 !IsComplex<T4>::value };
2085 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2086 struct UseDoublePrecisionKernel {
2087 enum { value = IsDouble<typename T1::ElementType>::value &&
2088 IsDouble<typename T2::ElementType>::value &&
2089 IsDouble<typename T3::ElementType>::value &&
2090 !IsComplex<T4>::value };
2099 template<
typename T1,
typename T2,
typename T3 >
2100 struct UseSinglePrecisionComplexKernel {
2101 typedef complex<float> Type;
2102 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2103 IsSame<typename T2::ElementType,Type>::value &&
2104 IsSame<typename T3::ElementType,Type>::value };
2113 template<
typename T1,
typename T2,
typename T3 >
2114 struct UseDoublePrecisionComplexKernel {
2115 typedef complex<double> Type;
2116 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2117 IsSame<typename T2::ElementType,Type>::value &&
2118 IsSame<typename T3::ElementType,Type>::value };
2126 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2127 struct UseDefaultKernel {
2128 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2129 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2130 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2131 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2139 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2140 struct UseVectorizedDefaultKernel {
2141 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2142 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2143 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2144 IsSame<typename T1::ElementType,T4>::value &&
2145 IntrinsicTrait<typename T1::ElementType>::addition &&
2146 IntrinsicTrait<typename T1::ElementType>::multiplication };
2152 typedef DMatScalarMultExpr<MMM,ST,false>
This;
2153 typedef typename MultTrait<RES,ST>::Type
ResultType;
2154 typedef typename ResultType::OppositeType
OppositeType;
2156 typedef typename ResultType::ElementType
ElementType;
2157 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2162 typedef const DMatTDMatMultExpr<MT1,MT2>
LeftOperand;
2168 typedef typename SelectType< IsComputation<MT1>::value,
const RT1,
CT1 >::Type
LT;
2171 typedef typename SelectType< IsComputation<MT2>::value,
const RT2,
CT2 >::Type
RT;
2176 enum { vectorizable = 0 };
2185 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2201 return matrix_(i,j) * scalar_;
2210 inline size_t rows()
const {
2211 return matrix_.rows();
2220 inline size_t columns()
const {
2221 return matrix_.columns();
2251 template<
typename T >
2252 inline bool canAlias(
const T* alias )
const {
2253 return matrix_.canAlias( alias );
2263 template<
typename T >
2264 inline bool isAliased(
const T* alias )
const {
2265 return matrix_.isAliased( alias );
2284 template<
typename MT3
2286 friend inline void assign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2293 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2294 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2296 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2299 else if( left.columns() == 0UL ) {
2315 DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2317 DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2335 template<
typename MT3
2339 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2340 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2342 for(
size_t i=0UL; i<A.rows(); ++i ) {
2343 for(
size_t k=0UL; k<B.columns(); ++k ) {
2344 C(i,k) = A(i,0UL) * B(0UL,k);
2346 for(
size_t j=1UL; j<A.columns(); ++j ) {
2347 for(
size_t k=0UL; k<B.columns(); ++k ) {
2348 C(i,k) += A(i,j) * B(j,k);
2351 for(
size_t k=0UL; k<B.columns(); ++k ) {
2372 template<
typename MT3
2376 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2377 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2379 typedef IntrinsicTrait<ElementType> IT;
2381 const size_t M( A.rows() );
2382 const size_t N( B.columns() );
2383 const size_t K( A.columns() );
2387 for( ; (i+2UL) <= M; i+=2UL ) {
2389 for( ; (j+4UL) <= N; j+=4UL ) {
2390 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2391 for(
size_t k=0UL; k<K; k+=IT::size ) {
2398 xmm1 = xmm1 + a1 * b1;
2399 xmm2 = xmm2 + a1 * b2;
2400 xmm3 = xmm3 + a1 * b3;
2401 xmm4 = xmm4 + a1 * b4;
2402 xmm5 = xmm5 + a2 * b1;
2403 xmm6 = xmm6 + a2 * b2;
2404 xmm7 = xmm7 + a2 * b3;
2405 xmm8 = xmm8 + a2 * b4;
2407 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2408 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2409 (~C)(i ,j+2UL) =
sum( xmm3 ) * scalar;
2410 (~C)(i ,j+3UL) =
sum( xmm4 ) * scalar;
2411 (~C)(i+1UL,j ) =
sum( xmm5 ) * scalar;
2412 (~C)(i+1UL,j+1UL) =
sum( xmm6 ) * scalar;
2413 (~C)(i+1UL,j+2UL) =
sum( xmm7 ) * scalar;
2414 (~C)(i+1UL,j+3UL) =
sum( xmm8 ) * scalar;
2416 for( ; (j+2UL) <= N; j+=2UL ) {
2418 for(
size_t k=0UL; k<K; k+=IT::size ) {
2423 xmm1 = xmm1 + a1 * b1;
2424 xmm2 = xmm2 + a1 * b2;
2425 xmm3 = xmm3 + a2 * b1;
2426 xmm4 = xmm4 + a2 * b2;
2428 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2429 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2430 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
2431 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
2435 for(
size_t k=0UL; k<K; k+=IT::size ) {
2437 xmm1 = xmm1 + A.get(i ,k) * b1;
2438 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
2440 (~C)(i ,j) =
sum( xmm1 ) * scalar;
2441 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
2446 for( ; (j+4UL) <= N; j+=4UL ) {
2448 for(
size_t k=0UL; k<K; k+=IT::size ) {
2450 xmm1 = xmm1 + a1 * B.get(k,j );
2451 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
2452 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
2453 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
2455 (~C)(i,j ) =
sum( xmm1 ) * scalar;
2456 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
2457 (~C)(i,j+2UL) =
sum( xmm3 ) * scalar;
2458 (~C)(i,j+3UL) =
sum( xmm4 ) * scalar;
2460 for( ; (j+2UL) <= N; j+=2UL ) {
2462 for(
size_t k=0UL; k<K; k+=IT::size ) {
2464 xmm1 = xmm1 + a1 * B.get(k,j );
2465 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
2467 (~C)(i,j ) =
sum( xmm1 ) * scalar;
2468 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
2472 for(
size_t k=0UL; k<K; k+=IT::size ) {
2473 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
2475 (~C)(i,j) =
sum( xmm1 ) * scalar;
2495 template<
typename MT3
2499 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2500 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2502 typedef IntrinsicTrait<ElementType> IT;
2504 const size_t M( A.rows() );
2505 const size_t N( B.columns() );
2506 const size_t K( A.columns() );
2510 for( ; (i+4UL) <= M; i+=4UL ) {
2512 for( ; (j+2UL) <= N; j+=2UL ) {
2513 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2514 for(
size_t k=0UL; k<K; k+=IT::size ) {
2521 xmm1 = xmm1 + a1 * b1;
2522 xmm2 = xmm2 + a1 * b2;
2523 xmm3 = xmm3 + a2 * b1;
2524 xmm4 = xmm4 + a2 * b2;
2525 xmm5 = xmm5 + a3 * b1;
2526 xmm6 = xmm6 + a3 * b2;
2527 xmm7 = xmm7 + a4 * b1;
2528 xmm8 = xmm8 + a4 * b2;
2530 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2531 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2532 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
2533 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
2534 (~C)(i+2UL,j ) =
sum( xmm5 ) * scalar;
2535 (~C)(i+2UL,j+1UL) =
sum( xmm6 ) * scalar;
2536 (~C)(i+3UL,j ) =
sum( xmm7 ) * scalar;
2537 (~C)(i+3UL,j+1UL) =
sum( xmm8 ) * scalar;
2541 for(
size_t k=0UL; k<K; k+=IT::size ) {
2543 xmm1 = xmm1 + A.get(i ,k) * b1;
2544 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
2545 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
2546 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
2548 (~C)(i ,j) =
sum( xmm1 ) * scalar;
2549 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
2550 (~C)(i+2UL,j) =
sum( xmm3 ) * scalar;
2551 (~C)(i+3UL,j) =
sum( xmm4 ) * scalar;
2554 for( ; (i+2UL) <= M; i+=2UL ) {
2556 for( ; (j+2UL) <= N; j+=2UL ) {
2558 for(
size_t k=0UL; k<K; k+=IT::size ) {
2563 xmm1 = xmm1 + a1 * b1;
2564 xmm2 = xmm2 + a1 * b2;
2565 xmm3 = xmm3 + a2 * b1;
2566 xmm4 = xmm4 + a2 * b2;
2568 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2569 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2570 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
2571 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
2575 for(
size_t k=0UL; k<K; k+=IT::size ) {
2577 xmm1 = xmm1 + A.get(i ,k) * b1;
2578 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
2580 (~C)(i ,j) =
sum( xmm1 ) * scalar;
2581 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
2586 for( ; (j+2UL) <= N; j+=2UL ) {
2588 for(
size_t k=0UL; k<K; k+=IT::size ) {
2590 xmm1 = xmm1 + a1 * B.get(k,j );
2591 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
2593 (~C)(i,j ) =
sum( xmm1 ) * scalar;
2594 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
2598 for(
size_t k=0UL; k<K; k+=IT::size ) {
2599 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
2601 (~C)(i,j) =
sum( xmm1 ) * scalar;
2621 template<
typename MT3
2625 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2626 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2628 selectDefaultAssignKernel( C, A, B, scalar );
2647 template<
typename MT3
2651 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2652 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2654 using boost::numeric_cast;
2660 const int M ( numeric_cast<int>( A.rows() ) );
2661 const int N ( numeric_cast<int>( B.columns() ) );
2662 const int K ( numeric_cast<int>( A.columns() ) );
2663 const int lda( numeric_cast<int>( A.spacing() ) );
2664 const int ldb( numeric_cast<int>( B.spacing() ) );
2665 const int ldc( numeric_cast<int>( C.spacing() ) );
2667 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2668 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2669 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2670 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2690 template<
typename MT3
2694 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2695 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2697 using boost::numeric_cast;
2703 const int M ( numeric_cast<int>( A.rows() ) );
2704 const int N ( numeric_cast<int>( B.columns() ) );
2705 const int K ( numeric_cast<int>( A.columns() ) );
2706 const int lda( numeric_cast<int>( A.spacing() ) );
2707 const int ldb( numeric_cast<int>( B.spacing() ) );
2708 const int ldc( numeric_cast<int>( C.spacing() ) );
2710 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2711 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2712 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2713 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2733 template<
typename MT3
2737 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2738 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2740 using boost::numeric_cast;
2749 const int M ( numeric_cast<int>( A.rows() ) );
2750 const int N ( numeric_cast<int>( B.columns() ) );
2751 const int K ( numeric_cast<int>( A.columns() ) );
2752 const int lda( numeric_cast<int>( A.spacing() ) );
2753 const int ldb( numeric_cast<int>( B.spacing() ) );
2754 const int ldc( numeric_cast<int>( C.spacing() ) );
2755 const complex<float> alpha( scalar );
2756 const complex<float> beta ( 0.0F, 0.0F );
2758 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2759 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2760 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2761 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2781 template<
typename MT3
2785 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2786 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2788 using boost::numeric_cast;
2797 const int M ( numeric_cast<int>( A.rows() ) );
2798 const int N ( numeric_cast<int>( B.columns() ) );
2799 const int K ( numeric_cast<int>( A.columns() ) );
2800 const int lda( numeric_cast<int>( A.spacing() ) );
2801 const int ldb( numeric_cast<int>( B.spacing() ) );
2802 const int ldc( numeric_cast<int>( C.spacing() ) );
2803 const complex<double> alpha( scalar );
2804 const complex<double> beta ( 0.0, 0.0 );
2806 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2807 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2808 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2809 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2826 template<
typename MT
2828 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
2832 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2844 const TmpType tmp( rhs );
2861 template<
typename MT3
2863 friend inline void addAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2870 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2871 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2873 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
2888 DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2890 DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2908 template<
typename MT3
2912 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2913 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2934 template<
typename MT3
2938 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2939 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2941 typedef IntrinsicTrait<ElementType> IT;
2943 const size_t M( A.rows() );
2944 const size_t N( B.columns() );
2945 const size_t K( A.columns() );
2949 for( ; (i+2UL) <= M; i+=2UL ) {
2951 for( ; (j+4UL) <= N; j+=4UL ) {
2952 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2953 for(
size_t k=0UL; k<K; k+=IT::size ) {
2960 xmm1 = xmm1 + a1 * b1;
2961 xmm2 = xmm2 + a1 * b2;
2962 xmm3 = xmm3 + a1 * b3;
2963 xmm4 = xmm4 + a1 * b4;
2964 xmm5 = xmm5 + a2 * b1;
2965 xmm6 = xmm6 + a2 * b2;
2966 xmm7 = xmm7 + a2 * b3;
2967 xmm8 = xmm8 + a2 * b4;
2969 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
2970 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
2971 (~C)(i ,j+2UL) +=
sum( xmm3 ) * scalar;
2972 (~C)(i ,j+3UL) +=
sum( xmm4 ) * scalar;
2973 (~C)(i+1UL,j ) +=
sum( xmm5 ) * scalar;
2974 (~C)(i+1UL,j+1UL) +=
sum( xmm6 ) * scalar;
2975 (~C)(i+1UL,j+2UL) +=
sum( xmm7 ) * scalar;
2976 (~C)(i+1UL,j+3UL) +=
sum( xmm8 ) * scalar;
2978 for( ; (j+2UL) <= N; j+=2UL ) {
2980 for(
size_t k=0UL; k<K; k+=IT::size ) {
2985 xmm1 = xmm1 + a1 * b1;
2986 xmm2 = xmm2 + a1 * b2;
2987 xmm3 = xmm3 + a2 * b1;
2988 xmm4 = xmm4 + a2 * b2;
2990 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
2991 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
2992 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
2993 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
2997 for(
size_t k=0UL; k<K; k+=IT::size ) {
2999 xmm1 = xmm1 + A.get(i ,k) * b1;
3000 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3002 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3003 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3008 for( ; (j+4UL) <= N; j+=4UL ) {
3010 for(
size_t k=0UL; k<K; k+=IT::size ) {
3012 xmm1 = xmm1 + a1 * B.get(k,j );
3013 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3014 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
3015 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
3017 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3018 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3019 (~C)(i,j+2UL) +=
sum( xmm3 ) * scalar;
3020 (~C)(i,j+3UL) +=
sum( xmm4 ) * scalar;
3022 for( ; (j+2UL) <= N; j+=2UL ) {
3024 for(
size_t k=0UL; k<K; k+=IT::size ) {
3026 xmm1 = xmm1 + a1 * B.get(k,j );
3027 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3029 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3030 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3034 for(
size_t k=0UL; k<K; k+=IT::size ) {
3035 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
3037 (~C)(i,j) +=
sum( xmm1 ) * scalar;
3057 template<
typename MT3
3061 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3062 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3064 typedef IntrinsicTrait<ElementType> IT;
3066 const size_t M( A.rows() );
3067 const size_t N( B.columns() );
3068 const size_t K( A.columns() );
3072 for( ; (i+4UL) <= M; i+=4UL ) {
3074 for( ; (j+2UL) <= N; j+=2UL ) {
3075 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3076 for(
size_t k=0UL; k<K; k+=IT::size ) {
3083 xmm1 = xmm1 + a1 * b1;
3084 xmm2 = xmm2 + a1 * b2;
3085 xmm3 = xmm3 + a2 * b1;
3086 xmm4 = xmm4 + a2 * b2;
3087 xmm5 = xmm5 + a3 * b1;
3088 xmm6 = xmm6 + a3 * b2;
3089 xmm7 = xmm7 + a4 * b1;
3090 xmm8 = xmm8 + a4 * b2;
3092 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3093 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3094 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3095 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3096 (~C)(i+2UL,j ) +=
sum( xmm5 ) * scalar;
3097 (~C)(i+2UL,j+1UL) +=
sum( xmm6 ) * scalar;
3098 (~C)(i+3UL,j ) +=
sum( xmm7 ) * scalar;
3099 (~C)(i+3UL,j+1UL) +=
sum( xmm8 ) * scalar;
3103 for(
size_t k=0UL; k<K; k+=IT::size ) {
3105 xmm1 = xmm1 + A.get(i ,k) * b1;
3106 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3107 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
3108 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
3110 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3111 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3112 (~C)(i+2UL,j) +=
sum( xmm3 ) * scalar;
3113 (~C)(i+3UL,j) +=
sum( xmm4 ) * scalar;
3116 for( ; (i+2UL) <= M; i+=2UL ) {
3118 for( ; (j+2UL) <= N; j+=2UL ) {
3120 for(
size_t k=0UL; k<K; k+=IT::size ) {
3125 xmm1 = xmm1 + a1 * b1;
3126 xmm2 = xmm2 + a1 * b2;
3127 xmm3 = xmm3 + a2 * b1;
3128 xmm4 = xmm4 + a2 * b2;
3130 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3131 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3132 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3133 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3137 for(
size_t k=0UL; k<K; k+=IT::size ) {
3139 xmm1 = xmm1 + A.get(i ,k) * b1;
3140 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3142 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3143 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3148 for( ; (j+2UL) <= N; j+=2UL ) {
3150 for(
size_t k=0UL; k<K; k+=IT::size ) {
3152 xmm1 = xmm1 + a1 * B.get(k,j );
3153 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3155 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3156 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3160 for(
size_t k=0UL; k<K; k+=IT::size ) {
3161 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
3163 (~C)(i,j) +=
sum( xmm1 ) * scalar;
3183 template<
typename MT3
3187 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3188 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3190 selectDefaultAddAssignKernel( C, A, B, scalar );
3209 template<
typename MT3
3213 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3214 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3216 using boost::numeric_cast;
3222 const int M ( numeric_cast<int>( A.rows() ) );
3223 const int N ( numeric_cast<int>( B.columns() ) );
3224 const int K ( numeric_cast<int>( A.columns() ) );
3225 const int lda( numeric_cast<int>( A.spacing() ) );
3226 const int ldb( numeric_cast<int>( B.spacing() ) );
3227 const int ldc( numeric_cast<int>( C.spacing() ) );
3229 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3230 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3231 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3232 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3252 template<
typename MT3
3256 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3257 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3259 using boost::numeric_cast;
3265 const int M ( numeric_cast<int>( A.rows() ) );
3266 const int N ( numeric_cast<int>( B.columns() ) );
3267 const int K ( numeric_cast<int>( A.columns() ) );
3268 const int lda( numeric_cast<int>( A.spacing() ) );
3269 const int ldb( numeric_cast<int>( B.spacing() ) );
3270 const int ldc( numeric_cast<int>( C.spacing() ) );
3272 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3273 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3274 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3275 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3295 template<
typename MT3
3299 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3300 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3302 using boost::numeric_cast;
3311 const int M ( numeric_cast<int>( A.rows() ) );
3312 const int N ( numeric_cast<int>( B.columns() ) );
3313 const int K ( numeric_cast<int>( A.columns() ) );
3314 const int lda( numeric_cast<int>( A.spacing() ) );
3315 const int ldb( numeric_cast<int>( B.spacing() ) );
3316 const int ldc( numeric_cast<int>( C.spacing() ) );
3317 const complex<float> alpha( scalar );
3318 const complex<float> beta ( 1.0F, 0.0F );
3320 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3321 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3322 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3323 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3343 template<
typename MT3
3347 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3348 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3350 using boost::numeric_cast;
3359 const int M ( numeric_cast<int>( A.rows() ) );
3360 const int N ( numeric_cast<int>( B.columns() ) );
3361 const int K ( numeric_cast<int>( A.columns() ) );
3362 const int lda( numeric_cast<int>( A.spacing() ) );
3363 const int ldb( numeric_cast<int>( B.spacing() ) );
3364 const int ldc( numeric_cast<int>( C.spacing() ) );
3365 const complex<double> alpha( scalar );
3366 const complex<double> beta ( 1.0, 0.0 );
3368 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3369 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3370 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3371 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3392 template<
typename MT3
3394 friend inline void subAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
3401 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3402 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3404 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3419 DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3421 DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3439 template<
typename MT3
3443 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3444 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3465 template<
typename MT3
3469 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3470 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3472 typedef IntrinsicTrait<ElementType> IT;
3474 const size_t M( A.rows() );
3475 const size_t N( B.columns() );
3476 const size_t K( A.columns() );
3480 for( ; (i+2UL) <= M; i+=2UL ) {
3482 for( ; (j+4UL) <= N; j+=4UL ) {
3483 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3484 for(
size_t k=0UL; k<K; k+=IT::size ) {
3491 xmm1 = xmm1 + a1 * b1;
3492 xmm2 = xmm2 + a1 * b2;
3493 xmm3 = xmm3 + a1 * b3;
3494 xmm4 = xmm4 + a1 * b4;
3495 xmm5 = xmm5 + a2 * b1;
3496 xmm6 = xmm6 + a2 * b2;
3497 xmm7 = xmm7 + a2 * b3;
3498 xmm8 = xmm8 + a2 * b4;
3500 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3501 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3502 (~C)(i ,j+2UL) -=
sum( xmm3 ) * scalar;
3503 (~C)(i ,j+3UL) -=
sum( xmm4 ) * scalar;
3504 (~C)(i+1UL,j ) -=
sum( xmm5 ) * scalar;
3505 (~C)(i+1UL,j+1UL) -=
sum( xmm6 ) * scalar;
3506 (~C)(i+1UL,j+2UL) -=
sum( xmm7 ) * scalar;
3507 (~C)(i+1UL,j+3UL) -=
sum( xmm8 ) * scalar;
3509 for( ; (j+2UL) <= N; j+=2UL ) {
3511 for(
size_t k=0UL; k<K; k+=IT::size ) {
3516 xmm1 = xmm1 + a1 * b1;
3517 xmm2 = xmm2 + a1 * b2;
3518 xmm3 = xmm3 + a2 * b1;
3519 xmm4 = xmm4 + a2 * b2;
3521 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3522 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3523 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
3524 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
3528 for(
size_t k=0UL; k<K; k+=IT::size ) {
3530 xmm1 = xmm1 + A.get(i ,k) * b1;
3531 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3533 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
3534 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
3539 for( ; (j+4UL) <= N; j+=4UL ) {
3541 for(
size_t k=0UL; k<K; k+=IT::size ) {
3543 xmm1 = xmm1 + a1 * B.get(k,j );
3544 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3545 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
3546 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
3548 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
3549 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
3550 (~C)(i,j+2UL) -=
sum( xmm3 ) * scalar;
3551 (~C)(i,j+3UL) -=
sum( xmm4 ) * scalar;
3553 for( ; (j+2UL) <= N; j+=2UL ) {
3555 for(
size_t k=0UL; k<K; k+=IT::size ) {
3557 xmm1 = xmm1 + a1 * B.get(k,j );
3558 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3560 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
3561 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
3565 for(
size_t k=0UL; k<K; k+=IT::size ) {
3566 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
3568 (~C)(i,j) -=
sum( xmm1 ) * scalar;
3588 template<
typename MT3
3592 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3593 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3595 typedef IntrinsicTrait<ElementType> IT;
3597 const size_t M( A.rows() );
3598 const size_t N( B.columns() );
3599 const size_t K( A.columns() );
3603 for( ; (i+4UL) <= M; i+=4UL ) {
3605 for( ; (j+2UL) <= N; j+=2UL ) {
3606 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3607 for(
size_t k=0UL; k<K; k+=IT::size ) {
3614 xmm1 = xmm1 + a1 * b1;
3615 xmm2 = xmm2 + a1 * b2;
3616 xmm3 = xmm3 + a2 * b1;
3617 xmm4 = xmm4 + a2 * b2;
3618 xmm5 = xmm5 + a3 * b1;
3619 xmm6 = xmm6 + a3 * b2;
3620 xmm7 = xmm7 + a4 * b1;
3621 xmm8 = xmm8 + a4 * b2;
3623 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3624 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3625 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
3626 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
3627 (~C)(i+2UL,j ) -=
sum( xmm5 ) * scalar;
3628 (~C)(i+2UL,j+1UL) -=
sum( xmm6 ) * scalar;
3629 (~C)(i+3UL,j ) -=
sum( xmm7 ) * scalar;
3630 (~C)(i+3UL,j+1UL) -=
sum( xmm8 ) * scalar;
3634 for(
size_t k=0UL; k<K; k+=IT::size ) {
3636 xmm1 = xmm1 + A.get(i ,k) * b1;
3637 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3638 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
3639 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
3641 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
3642 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
3643 (~C)(i+2UL,j) -=
sum( xmm3 ) * scalar;
3644 (~C)(i+3UL,j) -=
sum( xmm4 ) * scalar;
3647 for( ; (i+2UL) <= M; i+=2UL ) {
3649 for( ; (j+2UL) <= N; j+=2UL ) {
3651 for(
size_t k=0UL; k<K; k+=IT::size ) {
3656 xmm1 = xmm1 + a1 * b1;
3657 xmm2 = xmm2 + a1 * b2;
3658 xmm3 = xmm3 + a2 * b1;
3659 xmm4 = xmm4 + a2 * b2;
3661 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3662 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3663 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
3664 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
3668 for(
size_t k=0UL; k<K; k+=IT::size ) {
3670 xmm1 = xmm1 + A.get(i ,k) * b1;
3671 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3673 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
3674 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
3679 for( ; (j+2UL) <= N; j+=2UL ) {
3681 for(
size_t k=0UL; k<K; k+=IT::size ) {
3683 xmm1 = xmm1 + a1 * B.get(k,j );
3684 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3686 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
3687 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
3691 for(
size_t k=0UL; k<K; k+=IT::size ) {
3692 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
3694 (~C)(i,j) -=
sum( xmm1 ) * scalar;
3714 template<
typename MT3
3718 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3719 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3721 selectDefaultSubAssignKernel( C, A, B, scalar );
3740 template<
typename MT3
3744 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3745 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3747 using boost::numeric_cast;
3753 const int M ( numeric_cast<int>( A.rows() ) );
3754 const int N ( numeric_cast<int>( B.columns() ) );
3755 const int K ( numeric_cast<int>( A.columns() ) );
3756 const int lda( numeric_cast<int>( A.spacing() ) );
3757 const int ldb( numeric_cast<int>( B.spacing() ) );
3758 const int ldc( numeric_cast<int>( C.spacing() ) );
3760 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3761 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3762 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3763 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3783 template<
typename MT3
3787 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3788 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3790 using boost::numeric_cast;
3796 const int M ( numeric_cast<int>( A.rows() ) );
3797 const int N ( numeric_cast<int>( B.columns() ) );
3798 const int K ( numeric_cast<int>( A.columns() ) );
3799 const int lda( numeric_cast<int>( A.spacing() ) );
3800 const int ldb( numeric_cast<int>( B.spacing() ) );
3801 const int ldc( numeric_cast<int>( C.spacing() ) );
3803 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3804 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3805 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3806 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3826 template<
typename MT3
3830 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3831 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3833 using boost::numeric_cast;
3842 const int M ( numeric_cast<int>( A.rows() ) );
3843 const int N ( numeric_cast<int>( B.columns() ) );
3844 const int K ( numeric_cast<int>( A.columns() ) );
3845 const int lda( numeric_cast<int>( A.spacing() ) );
3846 const int ldb( numeric_cast<int>( B.spacing() ) );
3847 const int ldc( numeric_cast<int>( C.spacing() ) );
3848 const complex<float> alpha( -scalar );
3849 const complex<float> beta ( 1.0F, 0.0F );
3851 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3852 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3853 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3854 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3874 template<
typename MT3
3878 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3879 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3881 using boost::numeric_cast;
3890 const int M ( numeric_cast<int>( A.rows() ) );
3891 const int N ( numeric_cast<int>( B.columns() ) );
3892 const int K ( numeric_cast<int>( A.columns() ) );
3893 const int lda( numeric_cast<int>( A.spacing() ) );
3894 const int ldb( numeric_cast<int>( B.spacing() ) );
3895 const int ldc( numeric_cast<int>( C.spacing() ) );
3896 const complex<double> alpha( -scalar );
3897 const complex<double> beta ( 1.0, 0.0 );
3899 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3900 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3901 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3902 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3971 template<
typename T1
3973 inline const DMatTDMatMultExpr<T1,T2>
3979 throw std::invalid_argument(
"Matrix sizes do not match" );
3996 template<
typename MT1,
typename MT2,
typename VT >
4001 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4002 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4003 IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
4004 ,
typename DMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4005 , INVALID_TYPE >::Type Type;
4014 template<
typename MT1,
typename MT2,
typename VT >
4019 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4020 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4021 IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
4022 ,
typename DMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4023 , INVALID_TYPE >::Type Type;
4032 template<
typename VT,
typename MT1,
typename MT2 >
4037 typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
4038 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4039 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4040 ,
typename TDVecTDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4041 , INVALID_TYPE >::Type Type;
4050 template<
typename VT,
typename MT1,
typename MT2 >
4055 typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
4056 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4057 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4058 ,
typename TDVecTDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4059 , INVALID_TYPE >::Type Type;
4068 template<
typename MT1,
typename MT2 >
4073 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4082 template<
typename MT1,
typename MT2 >
4087 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;