22 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
23 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
31 #include <boost/cast.hpp>
94 template<
typename MT1
102 typedef typename MT1::ResultType
RT1;
103 typedef typename MT2::ResultType
RT2;
104 typedef typename MT1::CompositeType
CT1;
105 typedef typename MT2::CompositeType
CT2;
113 template<
typename T1,
typename T2,
typename T3 >
114 struct UseSinglePrecisionKernel {
127 template<
typename T1,
typename T2,
typename T3 >
128 struct UseDoublePrecisionKernel {
142 template<
typename T1,
typename T2,
typename T3 >
143 struct UseSinglePrecisionComplexKernel {
144 typedef complex<float> Type;
145 enum { value = IsSame<typename T1::ElementType,Type>::value &&
146 IsSame<typename T2::ElementType,Type>::value &&
147 IsSame<typename T3::ElementType,Type>::value };
158 template<
typename T1,
typename T2,
typename T3 >
159 struct UseDoublePrecisionComplexKernel {
160 typedef complex<double> Type;
161 enum { value = IsSame<typename T1::ElementType,Type>::value &&
162 IsSame<typename T2::ElementType,Type>::value &&
163 IsSame<typename T3::ElementType,Type>::value };
173 template<
typename T1,
typename T2,
typename T3 >
174 struct UseDefaultKernel {
175 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
176 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
177 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
178 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
188 template<
typename T1,
typename T2,
typename T3 >
189 struct UseVectorizedDefaultKernel {
190 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
191 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
192 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
193 IntrinsicTrait<typename T1::ElementType>::addition &&
194 IntrinsicTrait<typename T1::ElementType>::multiplication };
225 enum { vectorizable = 0 };
255 if(
lhs_.columns() != 0UL ) {
256 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
258 for(
size_t k=1UL; k<end; k+=2UL ) {
260 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
262 if( end <
lhs_.columns() ) {
290 return rhs_.columns();
320 template<
typename T >
322 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
332 template<
typename T >
334 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
353 template<
typename MT
362 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
365 else if( rhs.
lhs_.columns() == 0UL ) {
381 DMatTDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
383 DMatTDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
402 template<
typename MT3
406 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
408 const size_t M( A.rows() );
409 const size_t N( B.columns() );
410 const size_t K( A.columns() );
412 for(
size_t i=0UL; i<M; ++i ) {
413 for(
size_t j=0UL; j<N; ++j ) {
414 C(i,j) = A(i,0UL) * B(0UL,j);
416 for(
size_t k=1UL; k<K; ++k ) {
417 for(
size_t j=0UL; j<N; ++j ) {
418 C(i,j) += A(i,k) * B(k,j);
440 template<
typename MT3
443 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
444 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
446 typedef IntrinsicTrait<ElementType> IT;
448 const size_t M( A.rows() );
449 const size_t N( B.columns() );
450 const size_t K( A.columns() );
454 for( ; (i+2UL) <= M; i+=2UL ) {
456 for( ; (j+4UL) <= N; j+=4UL ) {
457 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
458 for(
size_t k=0UL; k<K; k+=IT::size ) {
465 xmm1 = xmm1 + a1 * b1;
466 xmm2 = xmm2 + a1 * b2;
467 xmm3 = xmm3 + a1 * b3;
468 xmm4 = xmm4 + a1 * b4;
469 xmm5 = xmm5 + a2 * b1;
470 xmm6 = xmm6 + a2 * b2;
471 xmm7 = xmm7 + a2 * b3;
472 xmm8 = xmm8 + a2 * b4;
474 (~C)(i ,j ) =
sum( xmm1 );
475 (~C)(i ,j+1UL) =
sum( xmm2 );
476 (~C)(i ,j+2UL) =
sum( xmm3 );
477 (~C)(i ,j+3UL) =
sum( xmm4 );
478 (~C)(i+1UL,j ) =
sum( xmm5 );
479 (~C)(i+1UL,j+1UL) =
sum( xmm6 );
480 (~C)(i+1UL,j+2UL) =
sum( xmm7 );
481 (~C)(i+1UL,j+3UL) =
sum( xmm8 );
483 for( ; (j+2UL) <= N; j+=2UL ) {
485 for(
size_t k=0UL; k<K; k+=IT::size ) {
490 xmm1 = xmm1 + a1 * b1;
491 xmm2 = xmm2 + a1 * b2;
492 xmm3 = xmm3 + a2 * b1;
493 xmm4 = xmm4 + a2 * b2;
495 (~C)(i ,j ) =
sum( xmm1 );
496 (~C)(i ,j+1UL) =
sum( xmm2 );
497 (~C)(i+1UL,j ) =
sum( xmm3 );
498 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
502 for(
size_t k=0UL; k<K; k+=IT::size ) {
504 xmm1 = xmm1 + A.get(i ,k) * b1;
505 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
507 (~C)(i ,j) =
sum( xmm1 );
508 (~C)(i+1UL,j) =
sum( xmm2 );
513 for( ; (j+4UL) <= N; j+=4UL ) {
515 for(
size_t k=0UL; k<K; k+=IT::size ) {
517 xmm1 = xmm1 + a1 * B.get(k,j );
518 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
519 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
520 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
522 (~C)(i,j ) =
sum( xmm1 );
523 (~C)(i,j+1UL) =
sum( xmm2 );
524 (~C)(i,j+2UL) =
sum( xmm3 );
525 (~C)(i,j+3UL) =
sum( xmm4 );
527 for( ; (j+2UL) <= N; j+=2UL ) {
529 for(
size_t k=0UL; k<K; k+=IT::size ) {
531 xmm1 = xmm1 + a1 * B.get(k,j );
532 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
534 (~C)(i,j ) =
sum( xmm1 );
535 (~C)(i,j+1UL) =
sum( xmm2 );
539 for(
size_t k=0UL; k<K; k+=IT::size ) {
540 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
542 (~C)(i,j) =
sum( xmm1 );
563 template<
typename MT3
566 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
567 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
569 typedef IntrinsicTrait<ElementType> IT;
571 const size_t M( A.rows() );
572 const size_t N( B.columns() );
573 const size_t K( A.columns() );
577 for( ; (i+4UL) <= M; i+=4UL ) {
579 for( ; (j+2UL) <= N; j+=2UL ) {
580 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
581 for(
size_t k=0UL; k<K; k+=IT::size ) {
588 xmm1 = xmm1 + a1 * b1;
589 xmm2 = xmm2 + a1 * b2;
590 xmm3 = xmm3 + a2 * b1;
591 xmm4 = xmm4 + a2 * b2;
592 xmm5 = xmm5 + a3 * b1;
593 xmm6 = xmm6 + a3 * b2;
594 xmm7 = xmm7 + a4 * b1;
595 xmm8 = xmm8 + a4 * b2;
597 (~C)(i ,j ) =
sum( xmm1 );
598 (~C)(i ,j+1UL) =
sum( xmm2 );
599 (~C)(i+1UL,j ) =
sum( xmm3 );
600 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
601 (~C)(i+2UL,j ) =
sum( xmm5 );
602 (~C)(i+2UL,j+1UL) =
sum( xmm6 );
603 (~C)(i+3UL,j ) =
sum( xmm7 );
604 (~C)(i+3UL,j+1UL) =
sum( xmm8 );
608 for(
size_t k=0UL; k<K; k+=IT::size ) {
610 xmm1 = xmm1 + A.get(i ,k) * b1;
611 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
612 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
613 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
615 (~C)(i ,j) =
sum( xmm1 );
616 (~C)(i+1UL,j) =
sum( xmm2 );
617 (~C)(i+2UL,j) =
sum( xmm3 );
618 (~C)(i+3UL,j) =
sum( xmm4 );
621 for( ; (i+2UL) <= M; i+=2UL ) {
623 for( ; (j+2UL) <= N; j+=2UL ) {
625 for(
size_t k=0UL; k<K; k+=IT::size ) {
630 xmm1 = xmm1 + a1 * b1;
631 xmm2 = xmm2 + a1 * b2;
632 xmm3 = xmm3 + a2 * b1;
633 xmm4 = xmm4 + a2 * b2;
635 (~C)(i ,j ) =
sum( xmm1 );
636 (~C)(i ,j+1UL) =
sum( xmm2 );
637 (~C)(i+1UL,j ) =
sum( xmm3 );
638 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
642 for(
size_t k=0UL; k<K; k+=IT::size ) {
644 xmm1 = xmm1 + A.get(i ,k) * b1;
645 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
647 (~C)(i ,j) =
sum( xmm1 );
648 (~C)(i+1UL,j) =
sum( xmm2 );
653 for( ; (j+2UL) <= N; j+=2UL ) {
655 for(
size_t k=0UL; k<K; k+=IT::size ) {
657 xmm1 = xmm1 + a1 * B.get(k,j );
658 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
660 (~C)(i,j ) =
sum( xmm1 );
661 (~C)(i,j+1UL) =
sum( xmm2 );
665 for(
size_t k=0UL; k<K; k+=IT::size ) {
666 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
668 (~C)(i,j) =
sum( xmm1 );
689 template<
typename MT3
692 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
693 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
695 selectDefaultAssignKernel( C, A, B );
715 template<
typename MT3
718 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
719 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
721 using boost::numeric_cast;
727 const int M ( numeric_cast<int>( A.rows() ) );
728 const int N ( numeric_cast<int>( B.columns() ) );
729 const int K ( numeric_cast<int>( A.columns() ) );
730 const int lda( numeric_cast<int>( A.spacing() ) );
731 const int ldb( numeric_cast<int>( B.spacing() ) );
732 const int ldc( numeric_cast<int>( C.spacing() ) );
734 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
735 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
736 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
737 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
758 template<
typename MT3
761 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
762 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
764 using boost::numeric_cast;
770 const int M ( numeric_cast<int>( A.rows() ) );
771 const int N ( numeric_cast<int>( B.columns() ) );
772 const int K ( numeric_cast<int>( A.columns() ) );
773 const int lda( numeric_cast<int>( A.spacing() ) );
774 const int ldb( numeric_cast<int>( B.spacing() ) );
775 const int ldc( numeric_cast<int>( C.spacing() ) );
777 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
778 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
779 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
780 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
801 template<
typename MT3
804 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
805 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
807 using boost::numeric_cast;
816 const int M ( numeric_cast<int>( A.rows() ) );
817 const int N ( numeric_cast<int>( B.columns() ) );
818 const int K ( numeric_cast<int>( A.columns() ) );
819 const int lda( numeric_cast<int>( A.spacing() ) );
820 const int ldb( numeric_cast<int>( B.spacing() ) );
821 const int ldc( numeric_cast<int>( C.spacing() ) );
822 const complex<float> alpha( 1.0F, 0.0F );
823 const complex<float> beta ( 0.0F, 0.0F );
825 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
826 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
827 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
828 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
849 template<
typename MT3
852 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
853 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
855 using boost::numeric_cast;
864 const int M ( numeric_cast<int>( A.rows() ) );
865 const int N ( numeric_cast<int>( B.columns() ) );
866 const int K ( numeric_cast<int>( A.columns() ) );
867 const int lda( numeric_cast<int>( A.spacing() ) );
868 const int ldb( numeric_cast<int>( B.spacing() ) );
869 const int ldc( numeric_cast<int>( C.spacing() ) );
870 const complex<double> alpha( 1.0, 0.0 );
871 const complex<double> beta ( 0.0, 0.0 );
873 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
874 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
875 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
876 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
894 template<
typename MT
900 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
912 const TmpType tmp( rhs );
931 template<
typename MT
940 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
955 DMatTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
957 DMatTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
976 template<
typename MT3
979 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
980 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
982 const size_t M( A.rows() );
983 const size_t N( B.columns() );
984 const size_t K( A.columns() );
987 const size_t end( N &
size_t(-2) );
989 for(
size_t i=0UL; i<M; ++i ) {
990 for(
size_t k=0UL; k<K; ++k ) {
991 for(
size_t j=0UL; j<end; j+=2UL ) {
992 C(i,j ) += A(i,k) * B(k,j );
993 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
996 C(i,end) += A(i,k) * B(k,end);
1018 template<
typename MT3
1021 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1022 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1024 typedef IntrinsicTrait<ElementType> IT;
1026 const size_t M( A.rows() );
1027 const size_t N( B.columns() );
1028 const size_t K( A.columns() );
1032 for( ; (i+2UL) <= M; i+=2UL ) {
1034 for( ; (j+4UL) <= N; j+=4UL ) {
1035 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1036 for(
size_t k=0UL; k<K; k+=IT::size ) {
1043 xmm1 = xmm1 + a1 * b1;
1044 xmm2 = xmm2 + a1 * b2;
1045 xmm3 = xmm3 + a1 * b3;
1046 xmm4 = xmm4 + a1 * b4;
1047 xmm5 = xmm5 + a2 * b1;
1048 xmm6 = xmm6 + a2 * b2;
1049 xmm7 = xmm7 + a2 * b3;
1050 xmm8 = xmm8 + a2 * b4;
1052 (~C)(i ,j ) +=
sum( xmm1 );
1053 (~C)(i ,j+1UL) +=
sum( xmm2 );
1054 (~C)(i ,j+2UL) +=
sum( xmm3 );
1055 (~C)(i ,j+3UL) +=
sum( xmm4 );
1056 (~C)(i+1UL,j ) +=
sum( xmm5 );
1057 (~C)(i+1UL,j+1UL) +=
sum( xmm6 );
1058 (~C)(i+1UL,j+2UL) +=
sum( xmm7 );
1059 (~C)(i+1UL,j+3UL) +=
sum( xmm8 );
1061 for( ; (j+2UL) <= N; j+=2UL ) {
1063 for(
size_t k=0UL; k<K; k+=IT::size ) {
1068 xmm1 = xmm1 + a1 * b1;
1069 xmm2 = xmm2 + a1 * b2;
1070 xmm3 = xmm3 + a2 * b1;
1071 xmm4 = xmm4 + a2 * b2;
1073 (~C)(i ,j ) +=
sum( xmm1 );
1074 (~C)(i ,j+1UL) +=
sum( xmm2 );
1075 (~C)(i+1UL,j ) +=
sum( xmm3 );
1076 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1080 for(
size_t k=0UL; k<K; k+=IT::size ) {
1082 xmm1 = xmm1 + A.get(i ,k) * b1;
1083 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1085 (~C)(i ,j) +=
sum( xmm1 );
1086 (~C)(i+1UL,j) +=
sum( xmm2 );
1091 for( ; (j+4UL) <= N; j+=4UL ) {
1093 for(
size_t k=0UL; k<K; k+=IT::size ) {
1095 xmm1 = xmm1 + a1 * B.get(k,j );
1096 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1097 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
1098 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
1100 (~C)(i,j ) +=
sum( xmm1 );
1101 (~C)(i,j+1UL) +=
sum( xmm2 );
1102 (~C)(i,j+2UL) +=
sum( xmm3 );
1103 (~C)(i,j+3UL) +=
sum( xmm4 );
1105 for( ; (j+2UL) <= N; j+=2UL ) {
1107 for(
size_t k=0UL; k<K; k+=IT::size ) {
1109 xmm1 = xmm1 + a1 * B.get(k,j );
1110 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1112 (~C)(i,j ) +=
sum( xmm1 );
1113 (~C)(i,j+1UL) +=
sum( xmm2 );
1117 for(
size_t k=0UL; k<K; k+=IT::size ) {
1118 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
1120 (~C)(i,j) +=
sum( xmm1 );
1141 template<
typename MT3
1144 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1145 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1147 typedef IntrinsicTrait<ElementType> IT;
1149 const size_t M( A.rows() );
1150 const size_t N( B.columns() );
1151 const size_t K( A.columns() );
1155 for( ; (i+4UL) <= M; i+=4UL ) {
1157 for( ; (j+2UL) <= N; j+=2UL ) {
1158 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1159 for(
size_t k=0UL; k<K; k+=IT::size ) {
1166 xmm1 = xmm1 + a1 * b1;
1167 xmm2 = xmm2 + a1 * b2;
1168 xmm3 = xmm3 + a2 * b1;
1169 xmm4 = xmm4 + a2 * b2;
1170 xmm5 = xmm5 + a3 * b1;
1171 xmm6 = xmm6 + a3 * b2;
1172 xmm7 = xmm7 + a4 * b1;
1173 xmm8 = xmm8 + a4 * b2;
1175 (~C)(i ,j ) +=
sum( xmm1 );
1176 (~C)(i ,j+1UL) +=
sum( xmm2 );
1177 (~C)(i+1UL,j ) +=
sum( xmm3 );
1178 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1179 (~C)(i+2UL,j ) +=
sum( xmm5 );
1180 (~C)(i+2UL,j+1UL) +=
sum( xmm6 );
1181 (~C)(i+3UL,j ) +=
sum( xmm7 );
1182 (~C)(i+3UL,j+1UL) +=
sum( xmm8 );
1186 for(
size_t k=0UL; k<K; k+=IT::size ) {
1188 xmm1 = xmm1 + A.get(i ,k) * b1;
1189 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1190 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
1191 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
1193 (~C)(i ,j) +=
sum( xmm1 );
1194 (~C)(i+1UL,j) +=
sum( xmm2 );
1195 (~C)(i+2UL,j) +=
sum( xmm3 );
1196 (~C)(i+3UL,j) +=
sum( xmm4 );
1199 for( ; (i+2UL) <= M; i+=2UL ) {
1201 for( ; (j+2UL) <= N; j+=2UL ) {
1203 for(
size_t k=0UL; k<K; k+=IT::size ) {
1208 xmm1 = xmm1 + a1 * b1;
1209 xmm2 = xmm2 + a1 * b2;
1210 xmm3 = xmm3 + a2 * b1;
1211 xmm4 = xmm4 + a2 * b2;
1213 (~C)(i ,j ) +=
sum( xmm1 );
1214 (~C)(i ,j+1UL) +=
sum( xmm2 );
1215 (~C)(i+1UL,j ) +=
sum( xmm3 );
1216 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1220 for(
size_t k=0UL; k<K; k+=IT::size ) {
1222 xmm1 = xmm1 + A.get(i ,k) * b1;
1223 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1225 (~C)(i ,j) +=
sum( xmm1 );
1226 (~C)(i+1UL,j) +=
sum( xmm2 );
1231 for( ; (j+2UL) <= N; j+=2UL ) {
1233 for(
size_t k=0UL; k<K; k+=IT::size ) {
1235 xmm1 = xmm1 + a1 * B.get(k,j );
1236 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1238 (~C)(i,j ) +=
sum( xmm1 );
1239 (~C)(i,j+1UL) +=
sum( xmm2 );
1243 for(
size_t k=0UL; k<K; k+=IT::size ) {
1244 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
1246 (~C)(i,j) +=
sum( xmm1 );
1267 template<
typename MT3
1270 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1271 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1273 selectDefaultAddAssignKernel( C, A, B );
1293 template<
typename MT3
1296 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1297 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1299 using boost::numeric_cast;
1305 const int M ( numeric_cast<int>( A.rows() ) );
1306 const int N ( numeric_cast<int>( B.columns() ) );
1307 const int K ( numeric_cast<int>( A.columns() ) );
1308 const int lda( numeric_cast<int>( A.spacing() ) );
1309 const int ldb( numeric_cast<int>( B.spacing() ) );
1310 const int ldc( numeric_cast<int>( C.spacing() ) );
1312 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1313 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1314 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1315 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1336 template<
typename MT3
1339 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1340 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1342 using boost::numeric_cast;
1348 const int M ( numeric_cast<int>( A.rows() ) );
1349 const int N ( numeric_cast<int>( B.columns() ) );
1350 const int K ( numeric_cast<int>( A.columns() ) );
1351 const int lda( numeric_cast<int>( A.spacing() ) );
1352 const int ldb( numeric_cast<int>( B.spacing() ) );
1353 const int ldc( numeric_cast<int>( C.spacing() ) );
1355 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1356 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1357 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1358 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1379 template<
typename MT3
1382 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1383 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1385 using boost::numeric_cast;
1394 const int M ( numeric_cast<int>( A.rows() ) );
1395 const int N ( numeric_cast<int>( B.columns() ) );
1396 const int K ( numeric_cast<int>( A.columns() ) );
1397 const int lda( numeric_cast<int>( A.spacing() ) );
1398 const int ldb( numeric_cast<int>( B.spacing() ) );
1399 const int ldc( numeric_cast<int>( C.spacing() ) );
1400 const complex<float> alpha( 1.0F, 0.0F );
1401 const complex<float> beta ( 1.0F, 0.0F );
1403 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1404 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1405 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1406 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1427 template<
typename MT3
1430 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1431 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1433 using boost::numeric_cast;
1442 const int M ( numeric_cast<int>( A.rows() ) );
1443 const int N ( numeric_cast<int>( B.columns() ) );
1444 const int K ( numeric_cast<int>( A.columns() ) );
1445 const int lda( numeric_cast<int>( A.spacing() ) );
1446 const int ldb( numeric_cast<int>( B.spacing() ) );
1447 const int ldc( numeric_cast<int>( C.spacing() ) );
1448 const complex<double> alpha( 1.0, 0.0 );
1449 const complex<double> beta ( 1.0, 0.0 );
1451 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1452 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1453 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1454 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1477 template<
typename MT
1486 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1501 DMatTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1503 DMatTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1522 template<
typename MT3
1525 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1526 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1528 const size_t M( A.rows() );
1529 const size_t N( B.columns() );
1530 const size_t K( A.columns() );
1533 const size_t end( N &
size_t(-2) );
1535 for(
size_t i=0UL; i<M; ++i ) {
1536 for(
size_t k=0UL; k<K; ++k ) {
1537 for(
size_t j=0UL; j<end; j+=2UL ) {
1538 C(i,j ) -= A(i,k) * B(k,j );
1539 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1542 C(i,end) -= A(i,k) * B(k,end);
1564 template<
typename MT3
1567 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1568 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1570 typedef IntrinsicTrait<ElementType> IT;
1572 const size_t M( A.rows() );
1573 const size_t N( B.columns() );
1574 const size_t K( A.columns() );
1578 for( ; (i+2UL) <= M; i+=2UL ) {
1580 for( ; (j+4UL) <= N; j+=4UL ) {
1581 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1582 for(
size_t k=0UL; k<K; k+=IT::size ) {
1589 xmm1 = xmm1 + a1 * b1;
1590 xmm2 = xmm2 + a1 * b2;
1591 xmm3 = xmm3 + a1 * b3;
1592 xmm4 = xmm4 + a1 * b4;
1593 xmm5 = xmm5 + a2 * b1;
1594 xmm6 = xmm6 + a2 * b2;
1595 xmm7 = xmm7 + a2 * b3;
1596 xmm8 = xmm8 + a2 * b4;
1598 (~C)(i ,j ) -=
sum( xmm1 );
1599 (~C)(i ,j+1UL) -=
sum( xmm2 );
1600 (~C)(i ,j+2UL) -=
sum( xmm3 );
1601 (~C)(i ,j+3UL) -=
sum( xmm4 );
1602 (~C)(i+1UL,j ) -=
sum( xmm5 );
1603 (~C)(i+1UL,j+1UL) -=
sum( xmm6 );
1604 (~C)(i+1UL,j+2UL) -=
sum( xmm7 );
1605 (~C)(i+1UL,j+3UL) -=
sum( xmm8 );
1607 for( ; (j+2UL) <= N; j+=2UL ) {
1609 for(
size_t k=0UL; k<K; k+=IT::size ) {
1614 xmm1 = xmm1 + a1 * b1;
1615 xmm2 = xmm2 + a1 * b2;
1616 xmm3 = xmm3 + a2 * b1;
1617 xmm4 = xmm4 + a2 * b2;
1619 (~C)(i ,j ) -=
sum( xmm1 );
1620 (~C)(i ,j+1UL) -=
sum( xmm2 );
1621 (~C)(i+1UL,j ) -=
sum( xmm3 );
1622 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1626 for(
size_t k=0UL; k<K; k+=IT::size ) {
1628 xmm1 = xmm1 + A.get(i ,k) * b1;
1629 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1631 (~C)(i ,j) -=
sum( xmm1 );
1632 (~C)(i+1UL,j) -=
sum( xmm2 );
1637 for( ; (j+4UL) <= N; j+=4UL ) {
1639 for(
size_t k=0UL; k<K; k+=IT::size ) {
1641 xmm1 = xmm1 + a1 * B.get(k,j );
1642 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1643 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
1644 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
1646 (~C)(i,j ) -=
sum( xmm1 );
1647 (~C)(i,j+1UL) -=
sum( xmm2 );
1648 (~C)(i,j+2UL) -=
sum( xmm3 );
1649 (~C)(i,j+3UL) -=
sum( xmm4 );
1651 for( ; (j+2UL) <= N; j+=2UL ) {
1653 for(
size_t k=0UL; k<K; k+=IT::size ) {
1655 xmm1 = xmm1 + a1 * B.get(k,j );
1656 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1658 (~C)(i,j ) -=
sum( xmm1 );
1659 (~C)(i,j+1UL) -=
sum( xmm2 );
1663 for(
size_t k=0UL; k<K; k+=IT::size ) {
1664 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
1666 (~C)(i,j) -=
sum( xmm1 );
1687 template<
typename MT3
1690 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1691 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1693 typedef IntrinsicTrait<ElementType> IT;
1695 const size_t M( A.rows() );
1696 const size_t N( B.columns() );
1697 const size_t K( A.columns() );
1701 for( ; (i+4UL) <= M; i+=4UL ) {
1703 for( ; (j+2UL) <= N; j+=2UL ) {
1704 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1705 for(
size_t k=0UL; k<K; k+=IT::size ) {
1712 xmm1 = xmm1 + a1 * b1;
1713 xmm2 = xmm2 + a1 * b2;
1714 xmm3 = xmm3 + a2 * b1;
1715 xmm4 = xmm4 + a2 * b2;
1716 xmm5 = xmm5 + a3 * b1;
1717 xmm6 = xmm6 + a3 * b2;
1718 xmm7 = xmm7 + a4 * b1;
1719 xmm8 = xmm8 + a4 * b2;
1721 (~C)(i ,j ) -=
sum( xmm1 );
1722 (~C)(i ,j+1UL) -=
sum( xmm2 );
1723 (~C)(i+1UL,j ) -=
sum( xmm3 );
1724 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1725 (~C)(i+2UL,j ) -=
sum( xmm5 );
1726 (~C)(i+2UL,j+1UL) -=
sum( xmm6 );
1727 (~C)(i+3UL,j ) -=
sum( xmm7 );
1728 (~C)(i+3UL,j+1UL) -=
sum( xmm8 );
1732 for(
size_t k=0UL; k<K; k+=IT::size ) {
1734 xmm1 = xmm1 + A.get(i ,k) * b1;
1735 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1736 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
1737 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
1739 (~C)(i ,j) -=
sum( xmm1 );
1740 (~C)(i+1UL,j) -=
sum( xmm2 );
1741 (~C)(i+2UL,j) -=
sum( xmm3 );
1742 (~C)(i+3UL,j) -=
sum( xmm4 );
1745 for( ; (i+2UL) <= M; i+=2UL ) {
1747 for( ; (j+2UL) <= N; j+=2UL ) {
1749 for(
size_t k=0UL; k<K; k+=IT::size ) {
1754 xmm1 = xmm1 + a1 * b1;
1755 xmm2 = xmm2 + a1 * b2;
1756 xmm3 = xmm3 + a2 * b1;
1757 xmm4 = xmm4 + a2 * b2;
1759 (~C)(i ,j ) -=
sum( xmm1 );
1760 (~C)(i ,j+1UL) -=
sum( xmm2 );
1761 (~C)(i+1UL,j ) -=
sum( xmm3 );
1762 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1766 for(
size_t k=0UL; k<K; k+=IT::size ) {
1768 xmm1 = xmm1 + A.get(i ,k) * b1;
1769 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
1771 (~C)(i ,j) -=
sum( xmm1 );
1772 (~C)(i+1UL,j) -=
sum( xmm2 );
1777 for( ; (j+2UL) <= N; j+=2UL ) {
1779 for(
size_t k=0UL; k<K; k+=IT::size ) {
1781 xmm1 = xmm1 + a1 * B.get(k,j );
1782 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
1784 (~C)(i,j ) -=
sum( xmm1 );
1785 (~C)(i,j+1UL) -=
sum( xmm2 );
1789 for(
size_t k=0UL; k<K; k+=IT::size ) {
1790 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
1792 (~C)(i,j) -=
sum( xmm1 );
1813 template<
typename MT3
1816 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1817 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1819 selectDefaultSubAssignKernel( C, A, B );
1839 template<
typename MT3
1842 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1843 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1845 using boost::numeric_cast;
1851 const int M ( numeric_cast<int>( A.rows() ) );
1852 const int N ( numeric_cast<int>( B.columns() ) );
1853 const int K ( numeric_cast<int>( A.columns() ) );
1854 const int lda( numeric_cast<int>( A.spacing() ) );
1855 const int ldb( numeric_cast<int>( B.spacing() ) );
1856 const int ldc( numeric_cast<int>( C.spacing() ) );
1858 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1859 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1860 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1861 M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1882 template<
typename MT3
1885 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1886 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1888 using boost::numeric_cast;
1894 const int M ( numeric_cast<int>( A.rows() ) );
1895 const int N ( numeric_cast<int>( B.columns() ) );
1896 const int K ( numeric_cast<int>( A.columns() ) );
1897 const int lda( numeric_cast<int>( A.spacing() ) );
1898 const int ldb( numeric_cast<int>( B.spacing() ) );
1899 const int ldc( numeric_cast<int>( C.spacing() ) );
1901 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1902 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1903 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1904 M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1925 template<
typename MT3
1928 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1929 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1931 using boost::numeric_cast;
1940 const int M ( numeric_cast<int>( A.rows() ) );
1941 const int N ( numeric_cast<int>( B.columns() ) );
1942 const int K ( numeric_cast<int>( A.columns() ) );
1943 const int lda( numeric_cast<int>( A.spacing() ) );
1944 const int ldb( numeric_cast<int>( B.spacing() ) );
1945 const int ldc( numeric_cast<int>( C.spacing() ) );
1946 const complex<float> alpha( -1.0F, 0.0F );
1947 const complex<float> beta ( 1.0F, 0.0F );
1949 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1950 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1951 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1952 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1973 template<
typename MT3
1976 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1977 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1979 using boost::numeric_cast;
1988 const int M ( numeric_cast<int>( A.rows() ) );
1989 const int N ( numeric_cast<int>( B.columns() ) );
1990 const int K ( numeric_cast<int>( A.columns() ) );
1991 const int lda( numeric_cast<int>( A.spacing() ) );
1992 const int ldb( numeric_cast<int>( B.spacing() ) );
1993 const int ldc( numeric_cast<int>( C.spacing() ) );
1994 const complex<double> alpha( -1.0, 0.0 );
1995 const complex<double> beta ( 1.0, 0.0 );
1997 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1998 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1999 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2000 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2046 template<
typename MT1
2050 :
public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
2051 ,
private Expression
2052 ,
private Computation
2056 typedef DMatTDMatMultExpr<MT1,MT2> MMM;
2057 typedef typename MMM::ResultType RES;
2058 typedef typename MT1::ResultType
RT1;
2059 typedef typename MT2::ResultType
RT2;
2060 typedef typename MT1::CompositeType
CT1;
2061 typedef typename MT2::CompositeType
CT2;
2069 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2070 struct UseSinglePrecisionKernel {
2071 enum { value = IsFloat<typename T1::ElementType>::value &&
2072 IsFloat<typename T2::ElementType>::value &&
2073 IsFloat<typename T3::ElementType>::value &&
2074 !IsComplex<T4>::value };
2083 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2084 struct UseDoublePrecisionKernel {
2085 enum { value = IsDouble<typename T1::ElementType>::value &&
2086 IsDouble<typename T2::ElementType>::value &&
2087 IsDouble<typename T3::ElementType>::value &&
2088 !IsComplex<T4>::value };
2097 template<
typename T1,
typename T2,
typename T3 >
2098 struct UseSinglePrecisionComplexKernel {
2099 typedef complex<float> Type;
2100 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2101 IsSame<typename T2::ElementType,Type>::value &&
2102 IsSame<typename T3::ElementType,Type>::value };
2111 template<
typename T1,
typename T2,
typename T3 >
2112 struct UseDoublePrecisionComplexKernel {
2113 typedef complex<double> Type;
2114 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2115 IsSame<typename T2::ElementType,Type>::value &&
2116 IsSame<typename T3::ElementType,Type>::value };
2124 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2125 struct UseDefaultKernel {
2126 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2127 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2128 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2129 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2137 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2138 struct UseVectorizedDefaultKernel {
2139 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2140 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2141 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2142 IsSame<typename T1::ElementType,T4>::value &&
2143 IntrinsicTrait<typename T1::ElementType>::addition &&
2144 IntrinsicTrait<typename T1::ElementType>::multiplication };
2150 typedef DMatScalarMultExpr<MMM,ST,false>
This;
2151 typedef typename MultTrait<RES,ST>::Type
ResultType;
2152 typedef typename ResultType::OppositeType
OppositeType;
2154 typedef typename ResultType::ElementType
ElementType;
2155 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2160 typedef const DMatTDMatMultExpr<MT1,MT2>
LeftOperand;
2166 typedef typename SelectType< IsComputation<MT1>::value,
const RT1,
CT1 >::Type
LT;
2169 typedef typename SelectType< IsComputation<MT2>::value,
const RT2,
CT2 >::Type
RT;
2174 enum { vectorizable = 0 };
2183 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2199 return matrix_(i,j) * scalar_;
2208 inline size_t rows()
const {
2209 return matrix_.rows();
2218 inline size_t columns()
const {
2219 return matrix_.columns();
2249 template<
typename T >
2250 inline bool canAlias(
const T* alias )
const {
2251 return matrix_.canAlias( alias );
2261 template<
typename T >
2262 inline bool isAliased(
const T* alias )
const {
2263 return matrix_.isAliased( alias );
2282 template<
typename MT3
2284 friend inline void assign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2291 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2292 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2294 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2297 else if( left.columns() == 0UL ) {
2313 DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2315 DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2333 template<
typename MT3
2337 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2338 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2340 for(
size_t i=0UL; i<A.rows(); ++i ) {
2341 for(
size_t k=0UL; k<B.columns(); ++k ) {
2342 C(i,k) = A(i,0UL) * B(0UL,k);
2344 for(
size_t j=1UL; j<A.columns(); ++j ) {
2345 for(
size_t k=0UL; k<B.columns(); ++k ) {
2346 C(i,k) += A(i,j) * B(j,k);
2349 for(
size_t k=0UL; k<B.columns(); ++k ) {
2370 template<
typename MT3
2374 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2375 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2377 typedef IntrinsicTrait<ElementType> IT;
2379 const size_t M( A.rows() );
2380 const size_t N( B.columns() );
2381 const size_t K( A.columns() );
2385 for( ; (i+2UL) <= M; i+=2UL ) {
2387 for( ; (j+4UL) <= N; j+=4UL ) {
2388 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2389 for(
size_t k=0UL; k<K; k+=IT::size ) {
2396 xmm1 = xmm1 + a1 * b1;
2397 xmm2 = xmm2 + a1 * b2;
2398 xmm3 = xmm3 + a1 * b3;
2399 xmm4 = xmm4 + a1 * b4;
2400 xmm5 = xmm5 + a2 * b1;
2401 xmm6 = xmm6 + a2 * b2;
2402 xmm7 = xmm7 + a2 * b3;
2403 xmm8 = xmm8 + a2 * b4;
2405 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2406 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2407 (~C)(i ,j+2UL) =
sum( xmm3 ) * scalar;
2408 (~C)(i ,j+3UL) =
sum( xmm4 ) * scalar;
2409 (~C)(i+1UL,j ) =
sum( xmm5 ) * scalar;
2410 (~C)(i+1UL,j+1UL) =
sum( xmm6 ) * scalar;
2411 (~C)(i+1UL,j+2UL) =
sum( xmm7 ) * scalar;
2412 (~C)(i+1UL,j+3UL) =
sum( xmm8 ) * scalar;
2414 for( ; (j+2UL) <= N; j+=2UL ) {
2416 for(
size_t k=0UL; k<K; k+=IT::size ) {
2421 xmm1 = xmm1 + a1 * b1;
2422 xmm2 = xmm2 + a1 * b2;
2423 xmm3 = xmm3 + a2 * b1;
2424 xmm4 = xmm4 + a2 * b2;
2426 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2427 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2428 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
2429 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
2433 for(
size_t k=0UL; k<K; k+=IT::size ) {
2435 xmm1 = xmm1 + A.get(i ,k) * b1;
2436 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
2438 (~C)(i ,j) =
sum( xmm1 ) * scalar;
2439 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
2444 for( ; (j+4UL) <= N; j+=4UL ) {
2446 for(
size_t k=0UL; k<K; k+=IT::size ) {
2448 xmm1 = xmm1 + a1 * B.get(k,j );
2449 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
2450 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
2451 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
2453 (~C)(i,j ) =
sum( xmm1 ) * scalar;
2454 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
2455 (~C)(i,j+2UL) =
sum( xmm3 ) * scalar;
2456 (~C)(i,j+3UL) =
sum( xmm4 ) * scalar;
2458 for( ; (j+2UL) <= N; j+=2UL ) {
2460 for(
size_t k=0UL; k<K; k+=IT::size ) {
2462 xmm1 = xmm1 + a1 * B.get(k,j );
2463 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
2465 (~C)(i,j ) =
sum( xmm1 ) * scalar;
2466 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
2470 for(
size_t k=0UL; k<K; k+=IT::size ) {
2471 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
2473 (~C)(i,j) =
sum( xmm1 ) * scalar;
2493 template<
typename MT3
2497 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2498 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2500 typedef IntrinsicTrait<ElementType> IT;
2502 const size_t M( A.rows() );
2503 const size_t N( B.columns() );
2504 const size_t K( A.columns() );
2508 for( ; (i+4UL) <= M; i+=4UL ) {
2510 for( ; (j+2UL) <= N; j+=2UL ) {
2511 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2512 for(
size_t k=0UL; k<K; k+=IT::size ) {
2519 xmm1 = xmm1 + a1 * b1;
2520 xmm2 = xmm2 + a1 * b2;
2521 xmm3 = xmm3 + a2 * b1;
2522 xmm4 = xmm4 + a2 * b2;
2523 xmm5 = xmm5 + a3 * b1;
2524 xmm6 = xmm6 + a3 * b2;
2525 xmm7 = xmm7 + a4 * b1;
2526 xmm8 = xmm8 + a4 * b2;
2528 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2529 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2530 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
2531 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
2532 (~C)(i+2UL,j ) =
sum( xmm5 ) * scalar;
2533 (~C)(i+2UL,j+1UL) =
sum( xmm6 ) * scalar;
2534 (~C)(i+3UL,j ) =
sum( xmm7 ) * scalar;
2535 (~C)(i+3UL,j+1UL) =
sum( xmm8 ) * scalar;
2539 for(
size_t k=0UL; k<K; k+=IT::size ) {
2541 xmm1 = xmm1 + A.get(i ,k) * b1;
2542 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
2543 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
2544 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
2546 (~C)(i ,j) =
sum( xmm1 ) * scalar;
2547 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
2548 (~C)(i+2UL,j) =
sum( xmm3 ) * scalar;
2549 (~C)(i+3UL,j) =
sum( xmm4 ) * scalar;
2552 for( ; (i+2UL) <= M; i+=2UL ) {
2554 for( ; (j+2UL) <= N; j+=2UL ) {
2556 for(
size_t k=0UL; k<K; k+=IT::size ) {
2561 xmm1 = xmm1 + a1 * b1;
2562 xmm2 = xmm2 + a1 * b2;
2563 xmm3 = xmm3 + a2 * b1;
2564 xmm4 = xmm4 + a2 * b2;
2566 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2567 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2568 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
2569 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
2573 for(
size_t k=0UL; k<K; k+=IT::size ) {
2575 xmm1 = xmm1 + A.get(i ,k) * b1;
2576 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
2578 (~C)(i ,j) =
sum( xmm1 ) * scalar;
2579 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
2584 for( ; (j+2UL) <= N; j+=2UL ) {
2586 for(
size_t k=0UL; k<K; k+=IT::size ) {
2588 xmm1 = xmm1 + a1 * B.get(k,j );
2589 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
2591 (~C)(i,j ) =
sum( xmm1 ) * scalar;
2592 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
2596 for(
size_t k=0UL; k<K; k+=IT::size ) {
2597 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
2599 (~C)(i,j) =
sum( xmm1 ) * scalar;
2619 template<
typename MT3
2623 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2624 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2626 selectDefaultAssignKernel( C, A, B, scalar );
2645 template<
typename MT3
2649 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2650 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2652 using boost::numeric_cast;
2658 const int M ( numeric_cast<int>( A.rows() ) );
2659 const int N ( numeric_cast<int>( B.columns() ) );
2660 const int K ( numeric_cast<int>( A.columns() ) );
2661 const int lda( numeric_cast<int>( A.spacing() ) );
2662 const int ldb( numeric_cast<int>( B.spacing() ) );
2663 const int ldc( numeric_cast<int>( C.spacing() ) );
2665 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2666 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2667 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2668 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2688 template<
typename MT3
2692 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2693 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2695 using boost::numeric_cast;
2701 const int M ( numeric_cast<int>( A.rows() ) );
2702 const int N ( numeric_cast<int>( B.columns() ) );
2703 const int K ( numeric_cast<int>( A.columns() ) );
2704 const int lda( numeric_cast<int>( A.spacing() ) );
2705 const int ldb( numeric_cast<int>( B.spacing() ) );
2706 const int ldc( numeric_cast<int>( C.spacing() ) );
2708 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2709 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2710 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2711 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2731 template<
typename MT3
2735 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2736 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2738 using boost::numeric_cast;
2748 const int M ( numeric_cast<int>( A.rows() ) );
2749 const int N ( numeric_cast<int>( B.columns() ) );
2750 const int K ( numeric_cast<int>( A.columns() ) );
2751 const int lda( numeric_cast<int>( A.spacing() ) );
2752 const int ldb( numeric_cast<int>( B.spacing() ) );
2753 const int ldc( numeric_cast<int>( C.spacing() ) );
2754 const complex<float> alpha( scalar );
2755 const complex<float> beta ( 0.0F, 0.0F );
2757 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2758 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2759 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2760 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2780 template<
typename MT3
2784 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2785 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2787 using boost::numeric_cast;
2797 const int M ( numeric_cast<int>( A.rows() ) );
2798 const int N ( numeric_cast<int>( B.columns() ) );
2799 const int K ( numeric_cast<int>( A.columns() ) );
2800 const int lda( numeric_cast<int>( A.spacing() ) );
2801 const int ldb( numeric_cast<int>( B.spacing() ) );
2802 const int ldc( numeric_cast<int>( C.spacing() ) );
2803 const complex<double> alpha( scalar );
2804 const complex<double> beta ( 0.0, 0.0 );
2806 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2807 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2808 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2809 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2826 template<
typename MT
2828 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
2832 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2844 const TmpType tmp( rhs );
2861 template<
typename MT3
2863 friend inline void addAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2870 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2871 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2873 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
2888 DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2890 DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2908 template<
typename MT3
2912 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2913 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2934 template<
typename MT3
2938 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2939 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2941 typedef IntrinsicTrait<ElementType> IT;
2943 const size_t M( A.rows() );
2944 const size_t N( B.columns() );
2945 const size_t K( A.columns() );
2949 for( ; (i+2UL) <= M; i+=2UL ) {
2951 for( ; (j+4UL) <= N; j+=4UL ) {
2952 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2953 for(
size_t k=0UL; k<K; k+=IT::size ) {
2960 xmm1 = xmm1 + a1 * b1;
2961 xmm2 = xmm2 + a1 * b2;
2962 xmm3 = xmm3 + a1 * b3;
2963 xmm4 = xmm4 + a1 * b4;
2964 xmm5 = xmm5 + a2 * b1;
2965 xmm6 = xmm6 + a2 * b2;
2966 xmm7 = xmm7 + a2 * b3;
2967 xmm8 = xmm8 + a2 * b4;
2969 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
2970 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
2971 (~C)(i ,j+2UL) +=
sum( xmm3 ) * scalar;
2972 (~C)(i ,j+3UL) +=
sum( xmm4 ) * scalar;
2973 (~C)(i+1UL,j ) +=
sum( xmm5 ) * scalar;
2974 (~C)(i+1UL,j+1UL) +=
sum( xmm6 ) * scalar;
2975 (~C)(i+1UL,j+2UL) +=
sum( xmm7 ) * scalar;
2976 (~C)(i+1UL,j+3UL) +=
sum( xmm8 ) * scalar;
2978 for( ; (j+2UL) <= N; j+=2UL ) {
2980 for(
size_t k=0UL; k<K; k+=IT::size ) {
2985 xmm1 = xmm1 + a1 * b1;
2986 xmm2 = xmm2 + a1 * b2;
2987 xmm3 = xmm3 + a2 * b1;
2988 xmm4 = xmm4 + a2 * b2;
2990 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
2991 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
2992 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
2993 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
2997 for(
size_t k=0UL; k<K; k+=IT::size ) {
2999 xmm1 = xmm1 + A.get(i ,k) * b1;
3000 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3002 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3003 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3008 for( ; (j+4UL) <= N; j+=4UL ) {
3010 for(
size_t k=0UL; k<K; k+=IT::size ) {
3012 xmm1 = xmm1 + a1 * B.get(k,j );
3013 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3014 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
3015 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
3017 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3018 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3019 (~C)(i,j+2UL) +=
sum( xmm3 ) * scalar;
3020 (~C)(i,j+3UL) +=
sum( xmm4 ) * scalar;
3022 for( ; (j+2UL) <= N; j+=2UL ) {
3024 for(
size_t k=0UL; k<K; k+=IT::size ) {
3026 xmm1 = xmm1 + a1 * B.get(k,j );
3027 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3029 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3030 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3034 for(
size_t k=0UL; k<K; k+=IT::size ) {
3035 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
3037 (~C)(i,j) +=
sum( xmm1 ) * scalar;
3057 template<
typename MT3
3061 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3062 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3064 typedef IntrinsicTrait<ElementType> IT;
3066 const size_t M( A.rows() );
3067 const size_t N( B.columns() );
3068 const size_t K( A.columns() );
3072 for( ; (i+4UL) <= M; i+=4UL ) {
3074 for( ; (j+2UL) <= N; j+=2UL ) {
3075 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3076 for(
size_t k=0UL; k<K; k+=IT::size ) {
3083 xmm1 = xmm1 + a1 * b1;
3084 xmm2 = xmm2 + a1 * b2;
3085 xmm3 = xmm3 + a2 * b1;
3086 xmm4 = xmm4 + a2 * b2;
3087 xmm5 = xmm5 + a3 * b1;
3088 xmm6 = xmm6 + a3 * b2;
3089 xmm7 = xmm7 + a4 * b1;
3090 xmm8 = xmm8 + a4 * b2;
3092 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3093 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3094 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3095 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3096 (~C)(i+2UL,j ) +=
sum( xmm5 ) * scalar;
3097 (~C)(i+2UL,j+1UL) +=
sum( xmm6 ) * scalar;
3098 (~C)(i+3UL,j ) +=
sum( xmm7 ) * scalar;
3099 (~C)(i+3UL,j+1UL) +=
sum( xmm8 ) * scalar;
3103 for(
size_t k=0UL; k<K; k+=IT::size ) {
3105 xmm1 = xmm1 + A.get(i ,k) * b1;
3106 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3107 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
3108 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
3110 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3111 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3112 (~C)(i+2UL,j) +=
sum( xmm3 ) * scalar;
3113 (~C)(i+3UL,j) +=
sum( xmm4 ) * scalar;
3116 for( ; (i+2UL) <= M; i+=2UL ) {
3118 for( ; (j+2UL) <= N; j+=2UL ) {
3120 for(
size_t k=0UL; k<K; k+=IT::size ) {
3125 xmm1 = xmm1 + a1 * b1;
3126 xmm2 = xmm2 + a1 * b2;
3127 xmm3 = xmm3 + a2 * b1;
3128 xmm4 = xmm4 + a2 * b2;
3130 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3131 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3132 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3133 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3137 for(
size_t k=0UL; k<K; k+=IT::size ) {
3139 xmm1 = xmm1 + A.get(i ,k) * b1;
3140 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3142 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3143 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3148 for( ; (j+2UL) <= N; j+=2UL ) {
3150 for(
size_t k=0UL; k<K; k+=IT::size ) {
3152 xmm1 = xmm1 + a1 * B.get(k,j );
3153 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3155 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3156 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3160 for(
size_t k=0UL; k<K; k+=IT::size ) {
3161 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
3163 (~C)(i,j) +=
sum( xmm1 ) * scalar;
3183 template<
typename MT3
3187 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3188 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3190 selectDefaultAddAssignKernel( C, A, B, scalar );
3209 template<
typename MT3
3213 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3214 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3216 using boost::numeric_cast;
3222 const int M ( numeric_cast<int>( A.rows() ) );
3223 const int N ( numeric_cast<int>( B.columns() ) );
3224 const int K ( numeric_cast<int>( A.columns() ) );
3225 const int lda( numeric_cast<int>( A.spacing() ) );
3226 const int ldb( numeric_cast<int>( B.spacing() ) );
3227 const int ldc( numeric_cast<int>( C.spacing() ) );
3229 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3230 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3231 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3232 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3252 template<
typename MT3
3256 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3257 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3259 using boost::numeric_cast;
3265 const int M ( numeric_cast<int>( A.rows() ) );
3266 const int N ( numeric_cast<int>( B.columns() ) );
3267 const int K ( numeric_cast<int>( A.columns() ) );
3268 const int lda( numeric_cast<int>( A.spacing() ) );
3269 const int ldb( numeric_cast<int>( B.spacing() ) );
3270 const int ldc( numeric_cast<int>( C.spacing() ) );
3272 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3273 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3274 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3275 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3295 template<
typename MT3
3299 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3300 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3302 using boost::numeric_cast;
3312 const int M ( numeric_cast<int>( A.rows() ) );
3313 const int N ( numeric_cast<int>( B.columns() ) );
3314 const int K ( numeric_cast<int>( A.columns() ) );
3315 const int lda( numeric_cast<int>( A.spacing() ) );
3316 const int ldb( numeric_cast<int>( B.spacing() ) );
3317 const int ldc( numeric_cast<int>( C.spacing() ) );
3318 const complex<float> alpha( scalar );
3319 const complex<float> beta ( 1.0F, 0.0F );
3321 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3322 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3323 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3324 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3344 template<
typename MT3
3348 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3349 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3351 using boost::numeric_cast;
3361 const int M ( numeric_cast<int>( A.rows() ) );
3362 const int N ( numeric_cast<int>( B.columns() ) );
3363 const int K ( numeric_cast<int>( A.columns() ) );
3364 const int lda( numeric_cast<int>( A.spacing() ) );
3365 const int ldb( numeric_cast<int>( B.spacing() ) );
3366 const int ldc( numeric_cast<int>( C.spacing() ) );
3367 const complex<double> alpha( scalar );
3368 const complex<double> beta ( 1.0, 0.0 );
3370 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3371 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3372 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3373 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3394 template<
typename MT3
3396 friend inline void subAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
3403 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3404 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3406 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3421 DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3423 DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3441 template<
typename MT3
3445 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3446 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3467 template<
typename MT3
3471 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3472 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3474 typedef IntrinsicTrait<ElementType> IT;
3476 const size_t M( A.rows() );
3477 const size_t N( B.columns() );
3478 const size_t K( A.columns() );
3482 for( ; (i+2UL) <= M; i+=2UL ) {
3484 for( ; (j+4UL) <= N; j+=4UL ) {
3485 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3486 for(
size_t k=0UL; k<K; k+=IT::size ) {
3493 xmm1 = xmm1 + a1 * b1;
3494 xmm2 = xmm2 + a1 * b2;
3495 xmm3 = xmm3 + a1 * b3;
3496 xmm4 = xmm4 + a1 * b4;
3497 xmm5 = xmm5 + a2 * b1;
3498 xmm6 = xmm6 + a2 * b2;
3499 xmm7 = xmm7 + a2 * b3;
3500 xmm8 = xmm8 + a2 * b4;
3502 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3503 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3504 (~C)(i ,j+2UL) -=
sum( xmm3 ) * scalar;
3505 (~C)(i ,j+3UL) -=
sum( xmm4 ) * scalar;
3506 (~C)(i+1UL,j ) -=
sum( xmm5 ) * scalar;
3507 (~C)(i+1UL,j+1UL) -=
sum( xmm6 ) * scalar;
3508 (~C)(i+1UL,j+2UL) -=
sum( xmm7 ) * scalar;
3509 (~C)(i+1UL,j+3UL) -=
sum( xmm8 ) * scalar;
3511 for( ; (j+2UL) <= N; j+=2UL ) {
3513 for(
size_t k=0UL; k<K; k+=IT::size ) {
3518 xmm1 = xmm1 + a1 * b1;
3519 xmm2 = xmm2 + a1 * b2;
3520 xmm3 = xmm3 + a2 * b1;
3521 xmm4 = xmm4 + a2 * b2;
3523 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3524 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3525 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
3526 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
3530 for(
size_t k=0UL; k<K; k+=IT::size ) {
3532 xmm1 = xmm1 + A.get(i ,k) * b1;
3533 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3535 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
3536 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
3541 for( ; (j+4UL) <= N; j+=4UL ) {
3543 for(
size_t k=0UL; k<K; k+=IT::size ) {
3545 xmm1 = xmm1 + a1 * B.get(k,j );
3546 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3547 xmm3 = xmm3 + a1 * B.get(k,j+2UL);
3548 xmm4 = xmm4 + a1 * B.get(k,j+3UL);
3550 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
3551 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
3552 (~C)(i,j+2UL) -=
sum( xmm3 ) * scalar;
3553 (~C)(i,j+3UL) -=
sum( xmm4 ) * scalar;
3555 for( ; (j+2UL) <= N; j+=2UL ) {
3557 for(
size_t k=0UL; k<K; k+=IT::size ) {
3559 xmm1 = xmm1 + a1 * B.get(k,j );
3560 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3562 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
3563 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
3567 for(
size_t k=0UL; k<K; k+=IT::size ) {
3568 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
3570 (~C)(i,j) -=
sum( xmm1 ) * scalar;
3590 template<
typename MT3
3594 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3595 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3597 typedef IntrinsicTrait<ElementType> IT;
3599 const size_t M( A.rows() );
3600 const size_t N( B.columns() );
3601 const size_t K( A.columns() );
3605 for( ; (i+4UL) <= M; i+=4UL ) {
3607 for( ; (j+2UL) <= N; j+=2UL ) {
3608 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3609 for(
size_t k=0UL; k<K; k+=IT::size ) {
3616 xmm1 = xmm1 + a1 * b1;
3617 xmm2 = xmm2 + a1 * b2;
3618 xmm3 = xmm3 + a2 * b1;
3619 xmm4 = xmm4 + a2 * b2;
3620 xmm5 = xmm5 + a3 * b1;
3621 xmm6 = xmm6 + a3 * b2;
3622 xmm7 = xmm7 + a4 * b1;
3623 xmm8 = xmm8 + a4 * b2;
3625 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3626 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3627 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
3628 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
3629 (~C)(i+2UL,j ) -=
sum( xmm5 ) * scalar;
3630 (~C)(i+2UL,j+1UL) -=
sum( xmm6 ) * scalar;
3631 (~C)(i+3UL,j ) -=
sum( xmm7 ) * scalar;
3632 (~C)(i+3UL,j+1UL) -=
sum( xmm8 ) * scalar;
3636 for(
size_t k=0UL; k<K; k+=IT::size ) {
3638 xmm1 = xmm1 + A.get(i ,k) * b1;
3639 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3640 xmm3 = xmm3 + A.get(i+2UL,k) * b1;
3641 xmm4 = xmm4 + A.get(i+3UL,k) * b1;
3643 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
3644 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
3645 (~C)(i+2UL,j) -=
sum( xmm3 ) * scalar;
3646 (~C)(i+3UL,j) -=
sum( xmm4 ) * scalar;
3649 for( ; (i+2UL) <= M; i+=2UL ) {
3651 for( ; (j+2UL) <= N; j+=2UL ) {
3653 for(
size_t k=0UL; k<K; k+=IT::size ) {
3658 xmm1 = xmm1 + a1 * b1;
3659 xmm2 = xmm2 + a1 * b2;
3660 xmm3 = xmm3 + a2 * b1;
3661 xmm4 = xmm4 + a2 * b2;
3663 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3664 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3665 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
3666 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
3670 for(
size_t k=0UL; k<K; k+=IT::size ) {
3672 xmm1 = xmm1 + A.get(i ,k) * b1;
3673 xmm2 = xmm2 + A.get(i+1UL,k) * b1;
3675 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
3676 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
3681 for( ; (j+2UL) <= N; j+=2UL ) {
3683 for(
size_t k=0UL; k<K; k+=IT::size ) {
3685 xmm1 = xmm1 + a1 * B.get(k,j );
3686 xmm2 = xmm2 + a1 * B.get(k,j+1UL);
3688 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
3689 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
3693 for(
size_t k=0UL; k<K; k+=IT::size ) {
3694 xmm1 = xmm1 + A.get(i,k) * B.get(k,j);
3696 (~C)(i,j) -=
sum( xmm1 ) * scalar;
3716 template<
typename MT3
3720 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3721 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3723 selectDefaultSubAssignKernel( C, A, B, scalar );
3742 template<
typename MT3
3746 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3747 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3749 using boost::numeric_cast;
3755 const int M ( numeric_cast<int>( A.rows() ) );
3756 const int N ( numeric_cast<int>( B.columns() ) );
3757 const int K ( numeric_cast<int>( A.columns() ) );
3758 const int lda( numeric_cast<int>( A.spacing() ) );
3759 const int ldb( numeric_cast<int>( B.spacing() ) );
3760 const int ldc( numeric_cast<int>( C.spacing() ) );
3762 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3763 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3764 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3765 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3785 template<
typename MT3
3789 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3790 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3792 using boost::numeric_cast;
3798 const int M ( numeric_cast<int>( A.rows() ) );
3799 const int N ( numeric_cast<int>( B.columns() ) );
3800 const int K ( numeric_cast<int>( A.columns() ) );
3801 const int lda( numeric_cast<int>( A.spacing() ) );
3802 const int ldb( numeric_cast<int>( B.spacing() ) );
3803 const int ldc( numeric_cast<int>( C.spacing() ) );
3805 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3806 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3807 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3808 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3828 template<
typename MT3
3832 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3833 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3835 using boost::numeric_cast;
3845 const int M ( numeric_cast<int>( A.rows() ) );
3846 const int N ( numeric_cast<int>( B.columns() ) );
3847 const int K ( numeric_cast<int>( A.columns() ) );
3848 const int lda( numeric_cast<int>( A.spacing() ) );
3849 const int ldb( numeric_cast<int>( B.spacing() ) );
3850 const int ldc( numeric_cast<int>( C.spacing() ) );
3851 const complex<float> alpha( -scalar );
3852 const complex<float> beta ( 1.0F, 0.0F );
3854 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3855 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3856 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3857 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3877 template<
typename MT3
3881 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3882 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3884 using boost::numeric_cast;
3894 const int M ( numeric_cast<int>( A.rows() ) );
3895 const int N ( numeric_cast<int>( B.columns() ) );
3896 const int K ( numeric_cast<int>( A.columns() ) );
3897 const int lda( numeric_cast<int>( A.spacing() ) );
3898 const int ldb( numeric_cast<int>( B.spacing() ) );
3899 const int ldc( numeric_cast<int>( C.spacing() ) );
3900 const complex<double> alpha( -scalar );
3901 const complex<double> beta ( 1.0, 0.0 );
3903 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3904 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3905 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3906 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3974 template<
typename T1
3976 inline const DMatTDMatMultExpr<T1,T2>
3982 throw std::invalid_argument(
"Matrix sizes do not match" );
4010 template<
typename MT1
4012 inline typename RowExprTrait< DMatTDMatMultExpr<MT1,MT2> >::Type
4013 row(
const DMatTDMatMultExpr<MT1,MT2>& dm,
size_t index )
4017 return row( dm.leftOperand(), index ) * dm.rightOperand();
4036 template<
typename MT1
4038 inline typename ColumnExprTrait< DMatTDMatMultExpr<MT1,MT2> >::Type
4039 column(
const DMatTDMatMultExpr<MT1,MT2>& dm,
size_t index )
4043 return dm.leftOperand() *
column( dm.rightOperand(), index );
4059 template<
typename MT1,
typename MT2,
typename VT >
4064 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4065 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4066 IsDenseVector<VT>::value && !IsTransposeVector<VT>::value
4067 ,
typename DMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4068 , INVALID_TYPE >::Type Type;
4077 template<
typename MT1,
typename MT2,
typename VT >
4082 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4083 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4084 IsSparseVector<VT>::value && !IsTransposeVector<VT>::value
4085 ,
typename DMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4086 , INVALID_TYPE >::Type Type;
4095 template<
typename VT,
typename MT1,
typename MT2 >
4100 typedef typename SelectType< IsDenseVector<VT>::value && IsTransposeVector<VT>::value &&
4101 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4102 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4103 ,
typename TDVecTDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4104 , INVALID_TYPE >::Type Type;
4113 template<
typename VT,
typename MT1,
typename MT2 >
4118 typedef typename SelectType< IsSparseVector<VT>::value && IsTransposeVector<VT>::value &&
4119 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4120 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4121 ,
typename TDVecTDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4122 , INVALID_TYPE >::Type Type;
4131 template<
typename MT1,
typename MT2 >
4136 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4145 template<
typename MT1,
typename MT2 >
4150 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;