35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATTDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
112 template<
typename MT1
131 template<
typename T1,
typename T2,
typename T3 >
132 struct UseSinglePrecisionKernel {
145 template<
typename T1,
typename T2,
typename T3 >
146 struct UseDoublePrecisionKernel {
160 template<
typename T1,
typename T2,
typename T3 >
161 struct UseSinglePrecisionComplexKernel {
162 typedef complex<float> Type;
163 enum { value = IsSame<typename T1::ElementType,Type>::value &&
164 IsSame<typename T2::ElementType,Type>::value &&
165 IsSame<typename T3::ElementType,Type>::value };
176 template<
typename T1,
typename T2,
typename T3 >
177 struct UseDoublePrecisionComplexKernel {
178 typedef complex<double> Type;
179 enum { value = IsSame<typename T1::ElementType,Type>::value &&
180 IsSame<typename T2::ElementType,Type>::value &&
181 IsSame<typename T3::ElementType,Type>::value };
191 template<
typename T1,
typename T2,
typename T3 >
192 struct UseDefaultKernel {
193 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
194 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
195 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
196 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
206 template<
typename T1,
typename T2,
typename T3 >
207 struct UseVectorizedDefaultKernel {
208 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
209 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
210 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
211 IntrinsicTrait<typename T1::ElementType>::addition &&
212 IntrinsicTrait<typename T1::ElementType>::multiplication };
243 enum { vectorizable = 0 };
273 if(
lhs_.columns() != 0UL ) {
274 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
276 for(
size_t k=1UL; k<end; k+=2UL ) {
278 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
280 if( end <
lhs_.columns() ) {
308 return rhs_.columns();
338 template<
typename T >
340 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
350 template<
typename T >
352 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
371 template<
typename MT
380 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
383 else if( rhs.
lhs_.columns() == 0UL ) {
399 DMatTDMatMultExpr::selectDefaultAssignKernel( ~lhs, A, B );
401 DMatTDMatMultExpr::selectBlasAssignKernel( ~lhs, A, B );
420 template<
typename MT3
424 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
426 const size_t M( A.rows() );
427 const size_t N( B.columns() );
428 const size_t K( A.columns() );
430 for(
size_t i=0UL; i<M; ++i ) {
431 for(
size_t j=0UL; j<N; ++j ) {
432 C(i,j) = A(i,0UL) * B(0UL,j);
434 for(
size_t k=1UL; k<K; ++k ) {
435 for(
size_t j=0UL; j<N; ++j ) {
436 C(i,j) += A(i,k) * B(k,j);
458 template<
typename MT3
461 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
462 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
464 typedef IntrinsicTrait<ElementType> IT;
466 const size_t M( A.rows() );
467 const size_t N( B.columns() );
468 const size_t K( A.columns() );
472 for( ; (i+2UL) <= M; i+=2UL ) {
474 for( ; (j+4UL) <= N; j+=4UL ) {
475 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
476 for(
size_t k=0UL; k<K; k+=IT::size ) {
483 xmm1 = xmm1 + a1 * b1;
484 xmm2 = xmm2 + a1 * b2;
485 xmm3 = xmm3 + a1 * b3;
486 xmm4 = xmm4 + a1 * b4;
487 xmm5 = xmm5 + a2 * b1;
488 xmm6 = xmm6 + a2 * b2;
489 xmm7 = xmm7 + a2 * b3;
490 xmm8 = xmm8 + a2 * b4;
492 (~C)(i ,j ) =
sum( xmm1 );
493 (~C)(i ,j+1UL) =
sum( xmm2 );
494 (~C)(i ,j+2UL) =
sum( xmm3 );
495 (~C)(i ,j+3UL) =
sum( xmm4 );
496 (~C)(i+1UL,j ) =
sum( xmm5 );
497 (~C)(i+1UL,j+1UL) =
sum( xmm6 );
498 (~C)(i+1UL,j+2UL) =
sum( xmm7 );
499 (~C)(i+1UL,j+3UL) =
sum( xmm8 );
501 for( ; (j+2UL) <= N; j+=2UL ) {
503 for(
size_t k=0UL; k<K; k+=IT::size ) {
508 xmm1 = xmm1 + a1 * b1;
509 xmm2 = xmm2 + a1 * b2;
510 xmm3 = xmm3 + a2 * b1;
511 xmm4 = xmm4 + a2 * b2;
513 (~C)(i ,j ) =
sum( xmm1 );
514 (~C)(i ,j+1UL) =
sum( xmm2 );
515 (~C)(i+1UL,j ) =
sum( xmm3 );
516 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
520 for(
size_t k=0UL; k<K; k+=IT::size ) {
522 xmm1 = xmm1 + A.load(i ,k) * b1;
523 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
525 (~C)(i ,j) =
sum( xmm1 );
526 (~C)(i+1UL,j) =
sum( xmm2 );
531 for( ; (j+4UL) <= N; j+=4UL ) {
533 for(
size_t k=0UL; k<K; k+=IT::size ) {
535 xmm1 = xmm1 + a1 * B.load(k,j );
536 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
537 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
538 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
540 (~C)(i,j ) =
sum( xmm1 );
541 (~C)(i,j+1UL) =
sum( xmm2 );
542 (~C)(i,j+2UL) =
sum( xmm3 );
543 (~C)(i,j+3UL) =
sum( xmm4 );
545 for( ; (j+2UL) <= N; j+=2UL ) {
547 for(
size_t k=0UL; k<K; k+=IT::size ) {
549 xmm1 = xmm1 + a1 * B.load(k,j );
550 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
552 (~C)(i,j ) =
sum( xmm1 );
553 (~C)(i,j+1UL) =
sum( xmm2 );
557 for(
size_t k=0UL; k<K; k+=IT::size ) {
558 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
560 (~C)(i,j) =
sum( xmm1 );
581 template<
typename MT3
584 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
585 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
587 typedef IntrinsicTrait<ElementType> IT;
589 const size_t M( A.rows() );
590 const size_t N( B.columns() );
591 const size_t K( A.columns() );
595 for( ; (i+4UL) <= M; i+=4UL ) {
597 for( ; (j+2UL) <= N; j+=2UL ) {
598 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
599 for(
size_t k=0UL; k<K; k+=IT::size ) {
606 xmm1 = xmm1 + a1 * b1;
607 xmm2 = xmm2 + a1 * b2;
608 xmm3 = xmm3 + a2 * b1;
609 xmm4 = xmm4 + a2 * b2;
610 xmm5 = xmm5 + a3 * b1;
611 xmm6 = xmm6 + a3 * b2;
612 xmm7 = xmm7 + a4 * b1;
613 xmm8 = xmm8 + a4 * b2;
615 (~C)(i ,j ) =
sum( xmm1 );
616 (~C)(i ,j+1UL) =
sum( xmm2 );
617 (~C)(i+1UL,j ) =
sum( xmm3 );
618 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
619 (~C)(i+2UL,j ) =
sum( xmm5 );
620 (~C)(i+2UL,j+1UL) =
sum( xmm6 );
621 (~C)(i+3UL,j ) =
sum( xmm7 );
622 (~C)(i+3UL,j+1UL) =
sum( xmm8 );
626 for(
size_t k=0UL; k<K; k+=IT::size ) {
628 xmm1 = xmm1 + A.load(i ,k) * b1;
629 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
630 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
631 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
633 (~C)(i ,j) =
sum( xmm1 );
634 (~C)(i+1UL,j) =
sum( xmm2 );
635 (~C)(i+2UL,j) =
sum( xmm3 );
636 (~C)(i+3UL,j) =
sum( xmm4 );
639 for( ; (i+2UL) <= M; i+=2UL ) {
641 for( ; (j+2UL) <= N; j+=2UL ) {
643 for(
size_t k=0UL; k<K; k+=IT::size ) {
648 xmm1 = xmm1 + a1 * b1;
649 xmm2 = xmm2 + a1 * b2;
650 xmm3 = xmm3 + a2 * b1;
651 xmm4 = xmm4 + a2 * b2;
653 (~C)(i ,j ) =
sum( xmm1 );
654 (~C)(i ,j+1UL) =
sum( xmm2 );
655 (~C)(i+1UL,j ) =
sum( xmm3 );
656 (~C)(i+1UL,j+1UL) =
sum( xmm4 );
660 for(
size_t k=0UL; k<K; k+=IT::size ) {
662 xmm1 = xmm1 + A.load(i ,k) * b1;
663 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
665 (~C)(i ,j) =
sum( xmm1 );
666 (~C)(i+1UL,j) =
sum( xmm2 );
671 for( ; (j+2UL) <= N; j+=2UL ) {
673 for(
size_t k=0UL; k<K; k+=IT::size ) {
675 xmm1 = xmm1 + a1 * B.load(k,j );
676 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
678 (~C)(i,j ) =
sum( xmm1 );
679 (~C)(i,j+1UL) =
sum( xmm2 );
683 for(
size_t k=0UL; k<K; k+=IT::size ) {
684 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
686 (~C)(i,j) =
sum( xmm1 );
707 template<
typename MT3
710 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
711 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
713 selectDefaultAssignKernel( C, A, B );
733 template<
typename MT3
736 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
737 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
739 using boost::numeric_cast;
745 const int M ( numeric_cast<int>( A.rows() ) );
746 const int N ( numeric_cast<int>( B.columns() ) );
747 const int K ( numeric_cast<int>( A.columns() ) );
748 const int lda( numeric_cast<int>( A.spacing() ) );
749 const int ldb( numeric_cast<int>( B.spacing() ) );
750 const int ldc( numeric_cast<int>( C.spacing() ) );
752 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
753 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
754 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
755 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
776 template<
typename MT3
779 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
780 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
782 using boost::numeric_cast;
788 const int M ( numeric_cast<int>( A.rows() ) );
789 const int N ( numeric_cast<int>( B.columns() ) );
790 const int K ( numeric_cast<int>( A.columns() ) );
791 const int lda( numeric_cast<int>( A.spacing() ) );
792 const int ldb( numeric_cast<int>( B.spacing() ) );
793 const int ldc( numeric_cast<int>( C.spacing() ) );
795 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
796 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
797 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
798 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
819 template<
typename MT3
822 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
823 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
825 using boost::numeric_cast;
834 const int M ( numeric_cast<int>( A.rows() ) );
835 const int N ( numeric_cast<int>( B.columns() ) );
836 const int K ( numeric_cast<int>( A.columns() ) );
837 const int lda( numeric_cast<int>( A.spacing() ) );
838 const int ldb( numeric_cast<int>( B.spacing() ) );
839 const int ldc( numeric_cast<int>( C.spacing() ) );
840 const complex<float> alpha( 1.0F, 0.0F );
841 const complex<float> beta ( 0.0F, 0.0F );
843 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
844 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
845 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
846 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
867 template<
typename MT3
870 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
871 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
873 using boost::numeric_cast;
882 const int M ( numeric_cast<int>( A.rows() ) );
883 const int N ( numeric_cast<int>( B.columns() ) );
884 const int K ( numeric_cast<int>( A.columns() ) );
885 const int lda( numeric_cast<int>( A.spacing() ) );
886 const int ldb( numeric_cast<int>( B.spacing() ) );
887 const int ldc( numeric_cast<int>( C.spacing() ) );
888 const complex<double> alpha( 1.0, 0.0 );
889 const complex<double> beta ( 0.0, 0.0 );
891 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
892 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
893 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
894 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
912 template<
typename MT
918 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
930 const TmpType tmp( rhs );
949 template<
typename MT
958 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
973 DMatTDMatMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B );
975 DMatTDMatMultExpr::selectBlasAddAssignKernel( ~lhs, A, B );
994 template<
typename MT3
997 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
998 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1000 const size_t M( A.rows() );
1001 const size_t N( B.columns() );
1002 const size_t K( A.columns() );
1005 const size_t end( N &
size_t(-2) );
1007 for(
size_t i=0UL; i<M; ++i ) {
1008 for(
size_t k=0UL; k<K; ++k ) {
1009 for(
size_t j=0UL; j<end; j+=2UL ) {
1010 C(i,j ) += A(i,k) * B(k,j );
1011 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1014 C(i,end) += A(i,k) * B(k,end);
1036 template<
typename MT3
1039 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1040 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1042 typedef IntrinsicTrait<ElementType> IT;
1044 const size_t M( A.rows() );
1045 const size_t N( B.columns() );
1046 const size_t K( A.columns() );
1050 for( ; (i+2UL) <= M; i+=2UL ) {
1052 for( ; (j+4UL) <= N; j+=4UL ) {
1053 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1054 for(
size_t k=0UL; k<K; k+=IT::size ) {
1061 xmm1 = xmm1 + a1 * b1;
1062 xmm2 = xmm2 + a1 * b2;
1063 xmm3 = xmm3 + a1 * b3;
1064 xmm4 = xmm4 + a1 * b4;
1065 xmm5 = xmm5 + a2 * b1;
1066 xmm6 = xmm6 + a2 * b2;
1067 xmm7 = xmm7 + a2 * b3;
1068 xmm8 = xmm8 + a2 * b4;
1070 (~C)(i ,j ) +=
sum( xmm1 );
1071 (~C)(i ,j+1UL) +=
sum( xmm2 );
1072 (~C)(i ,j+2UL) +=
sum( xmm3 );
1073 (~C)(i ,j+3UL) +=
sum( xmm4 );
1074 (~C)(i+1UL,j ) +=
sum( xmm5 );
1075 (~C)(i+1UL,j+1UL) +=
sum( xmm6 );
1076 (~C)(i+1UL,j+2UL) +=
sum( xmm7 );
1077 (~C)(i+1UL,j+3UL) +=
sum( xmm8 );
1079 for( ; (j+2UL) <= N; j+=2UL ) {
1081 for(
size_t k=0UL; k<K; k+=IT::size ) {
1086 xmm1 = xmm1 + a1 * b1;
1087 xmm2 = xmm2 + a1 * b2;
1088 xmm3 = xmm3 + a2 * b1;
1089 xmm4 = xmm4 + a2 * b2;
1091 (~C)(i ,j ) +=
sum( xmm1 );
1092 (~C)(i ,j+1UL) +=
sum( xmm2 );
1093 (~C)(i+1UL,j ) +=
sum( xmm3 );
1094 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1098 for(
size_t k=0UL; k<K; k+=IT::size ) {
1100 xmm1 = xmm1 + A.load(i ,k) * b1;
1101 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1103 (~C)(i ,j) +=
sum( xmm1 );
1104 (~C)(i+1UL,j) +=
sum( xmm2 );
1109 for( ; (j+4UL) <= N; j+=4UL ) {
1111 for(
size_t k=0UL; k<K; k+=IT::size ) {
1113 xmm1 = xmm1 + a1 * B.load(k,j );
1114 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1115 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1116 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1118 (~C)(i,j ) +=
sum( xmm1 );
1119 (~C)(i,j+1UL) +=
sum( xmm2 );
1120 (~C)(i,j+2UL) +=
sum( xmm3 );
1121 (~C)(i,j+3UL) +=
sum( xmm4 );
1123 for( ; (j+2UL) <= N; j+=2UL ) {
1125 for(
size_t k=0UL; k<K; k+=IT::size ) {
1127 xmm1 = xmm1 + a1 * B.load(k,j );
1128 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1130 (~C)(i,j ) +=
sum( xmm1 );
1131 (~C)(i,j+1UL) +=
sum( xmm2 );
1135 for(
size_t k=0UL; k<K; k+=IT::size ) {
1136 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1138 (~C)(i,j) +=
sum( xmm1 );
1159 template<
typename MT3
1162 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1163 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1165 typedef IntrinsicTrait<ElementType> IT;
1167 const size_t M( A.rows() );
1168 const size_t N( B.columns() );
1169 const size_t K( A.columns() );
1173 for( ; (i+4UL) <= M; i+=4UL ) {
1175 for( ; (j+2UL) <= N; j+=2UL ) {
1176 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1177 for(
size_t k=0UL; k<K; k+=IT::size ) {
1184 xmm1 = xmm1 + a1 * b1;
1185 xmm2 = xmm2 + a1 * b2;
1186 xmm3 = xmm3 + a2 * b1;
1187 xmm4 = xmm4 + a2 * b2;
1188 xmm5 = xmm5 + a3 * b1;
1189 xmm6 = xmm6 + a3 * b2;
1190 xmm7 = xmm7 + a4 * b1;
1191 xmm8 = xmm8 + a4 * b2;
1193 (~C)(i ,j ) +=
sum( xmm1 );
1194 (~C)(i ,j+1UL) +=
sum( xmm2 );
1195 (~C)(i+1UL,j ) +=
sum( xmm3 );
1196 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1197 (~C)(i+2UL,j ) +=
sum( xmm5 );
1198 (~C)(i+2UL,j+1UL) +=
sum( xmm6 );
1199 (~C)(i+3UL,j ) +=
sum( xmm7 );
1200 (~C)(i+3UL,j+1UL) +=
sum( xmm8 );
1204 for(
size_t k=0UL; k<K; k+=IT::size ) {
1206 xmm1 = xmm1 + A.load(i ,k) * b1;
1207 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1208 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1209 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1211 (~C)(i ,j) +=
sum( xmm1 );
1212 (~C)(i+1UL,j) +=
sum( xmm2 );
1213 (~C)(i+2UL,j) +=
sum( xmm3 );
1214 (~C)(i+3UL,j) +=
sum( xmm4 );
1217 for( ; (i+2UL) <= M; i+=2UL ) {
1219 for( ; (j+2UL) <= N; j+=2UL ) {
1221 for(
size_t k=0UL; k<K; k+=IT::size ) {
1226 xmm1 = xmm1 + a1 * b1;
1227 xmm2 = xmm2 + a1 * b2;
1228 xmm3 = xmm3 + a2 * b1;
1229 xmm4 = xmm4 + a2 * b2;
1231 (~C)(i ,j ) +=
sum( xmm1 );
1232 (~C)(i ,j+1UL) +=
sum( xmm2 );
1233 (~C)(i+1UL,j ) +=
sum( xmm3 );
1234 (~C)(i+1UL,j+1UL) +=
sum( xmm4 );
1238 for(
size_t k=0UL; k<K; k+=IT::size ) {
1240 xmm1 = xmm1 + A.load(i ,k) * b1;
1241 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1243 (~C)(i ,j) +=
sum( xmm1 );
1244 (~C)(i+1UL,j) +=
sum( xmm2 );
1249 for( ; (j+2UL) <= N; j+=2UL ) {
1251 for(
size_t k=0UL; k<K; k+=IT::size ) {
1253 xmm1 = xmm1 + a1 * B.load(k,j );
1254 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1256 (~C)(i,j ) +=
sum( xmm1 );
1257 (~C)(i,j+1UL) +=
sum( xmm2 );
1261 for(
size_t k=0UL; k<K; k+=IT::size ) {
1262 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1264 (~C)(i,j) +=
sum( xmm1 );
1285 template<
typename MT3
1288 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1289 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1291 selectDefaultAddAssignKernel( C, A, B );
1311 template<
typename MT3
1314 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1315 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1317 using boost::numeric_cast;
1323 const int M ( numeric_cast<int>( A.rows() ) );
1324 const int N ( numeric_cast<int>( B.columns() ) );
1325 const int K ( numeric_cast<int>( A.columns() ) );
1326 const int lda( numeric_cast<int>( A.spacing() ) );
1327 const int ldb( numeric_cast<int>( B.spacing() ) );
1328 const int ldc( numeric_cast<int>( C.spacing() ) );
1330 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1331 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1332 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1333 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1354 template<
typename MT3
1357 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1358 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1360 using boost::numeric_cast;
1366 const int M ( numeric_cast<int>( A.rows() ) );
1367 const int N ( numeric_cast<int>( B.columns() ) );
1368 const int K ( numeric_cast<int>( A.columns() ) );
1369 const int lda( numeric_cast<int>( A.spacing() ) );
1370 const int ldb( numeric_cast<int>( B.spacing() ) );
1371 const int ldc( numeric_cast<int>( C.spacing() ) );
1373 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1374 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1375 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1376 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1397 template<
typename MT3
1400 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1401 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1403 using boost::numeric_cast;
1412 const int M ( numeric_cast<int>( A.rows() ) );
1413 const int N ( numeric_cast<int>( B.columns() ) );
1414 const int K ( numeric_cast<int>( A.columns() ) );
1415 const int lda( numeric_cast<int>( A.spacing() ) );
1416 const int ldb( numeric_cast<int>( B.spacing() ) );
1417 const int ldc( numeric_cast<int>( C.spacing() ) );
1418 const complex<float> alpha( 1.0F, 0.0F );
1419 const complex<float> beta ( 1.0F, 0.0F );
1421 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1422 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1423 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1424 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1445 template<
typename MT3
1448 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1449 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1451 using boost::numeric_cast;
1460 const int M ( numeric_cast<int>( A.rows() ) );
1461 const int N ( numeric_cast<int>( B.columns() ) );
1462 const int K ( numeric_cast<int>( A.columns() ) );
1463 const int lda( numeric_cast<int>( A.spacing() ) );
1464 const int ldb( numeric_cast<int>( B.spacing() ) );
1465 const int ldc( numeric_cast<int>( C.spacing() ) );
1466 const complex<double> alpha( 1.0, 0.0 );
1467 const complex<double> beta ( 1.0, 0.0 );
1469 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1470 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1471 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1472 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1495 template<
typename MT
1504 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1519 DMatTDMatMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B );
1521 DMatTDMatMultExpr::selectBlasSubAssignKernel( ~lhs, A, B );
1540 template<
typename MT3
1543 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1544 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1546 const size_t M( A.rows() );
1547 const size_t N( B.columns() );
1548 const size_t K( A.columns() );
1551 const size_t end( N &
size_t(-2) );
1553 for(
size_t i=0UL; i<M; ++i ) {
1554 for(
size_t k=0UL; k<K; ++k ) {
1555 for(
size_t j=0UL; j<end; j+=2UL ) {
1556 C(i,j ) -= A(i,k) * B(k,j );
1557 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1560 C(i,end) -= A(i,k) * B(k,end);
1582 template<
typename MT3
1585 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1586 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1588 typedef IntrinsicTrait<ElementType> IT;
1590 const size_t M( A.rows() );
1591 const size_t N( B.columns() );
1592 const size_t K( A.columns() );
1596 for( ; (i+2UL) <= M; i+=2UL ) {
1598 for( ; (j+4UL) <= N; j+=4UL ) {
1599 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1600 for(
size_t k=0UL; k<K; k+=IT::size ) {
1607 xmm1 = xmm1 + a1 * b1;
1608 xmm2 = xmm2 + a1 * b2;
1609 xmm3 = xmm3 + a1 * b3;
1610 xmm4 = xmm4 + a1 * b4;
1611 xmm5 = xmm5 + a2 * b1;
1612 xmm6 = xmm6 + a2 * b2;
1613 xmm7 = xmm7 + a2 * b3;
1614 xmm8 = xmm8 + a2 * b4;
1616 (~C)(i ,j ) -=
sum( xmm1 );
1617 (~C)(i ,j+1UL) -=
sum( xmm2 );
1618 (~C)(i ,j+2UL) -=
sum( xmm3 );
1619 (~C)(i ,j+3UL) -=
sum( xmm4 );
1620 (~C)(i+1UL,j ) -=
sum( xmm5 );
1621 (~C)(i+1UL,j+1UL) -=
sum( xmm6 );
1622 (~C)(i+1UL,j+2UL) -=
sum( xmm7 );
1623 (~C)(i+1UL,j+3UL) -=
sum( xmm8 );
1625 for( ; (j+2UL) <= N; j+=2UL ) {
1627 for(
size_t k=0UL; k<K; k+=IT::size ) {
1632 xmm1 = xmm1 + a1 * b1;
1633 xmm2 = xmm2 + a1 * b2;
1634 xmm3 = xmm3 + a2 * b1;
1635 xmm4 = xmm4 + a2 * b2;
1637 (~C)(i ,j ) -=
sum( xmm1 );
1638 (~C)(i ,j+1UL) -=
sum( xmm2 );
1639 (~C)(i+1UL,j ) -=
sum( xmm3 );
1640 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1644 for(
size_t k=0UL; k<K; k+=IT::size ) {
1646 xmm1 = xmm1 + A.load(i ,k) * b1;
1647 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1649 (~C)(i ,j) -=
sum( xmm1 );
1650 (~C)(i+1UL,j) -=
sum( xmm2 );
1655 for( ; (j+4UL) <= N; j+=4UL ) {
1657 for(
size_t k=0UL; k<K; k+=IT::size ) {
1659 xmm1 = xmm1 + a1 * B.load(k,j );
1660 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1661 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
1662 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
1664 (~C)(i,j ) -=
sum( xmm1 );
1665 (~C)(i,j+1UL) -=
sum( xmm2 );
1666 (~C)(i,j+2UL) -=
sum( xmm3 );
1667 (~C)(i,j+3UL) -=
sum( xmm4 );
1669 for( ; (j+2UL) <= N; j+=2UL ) {
1671 for(
size_t k=0UL; k<K; k+=IT::size ) {
1673 xmm1 = xmm1 + a1 * B.load(k,j );
1674 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1676 (~C)(i,j ) -=
sum( xmm1 );
1677 (~C)(i,j+1UL) -=
sum( xmm2 );
1681 for(
size_t k=0UL; k<K; k+=IT::size ) {
1682 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1684 (~C)(i,j) -=
sum( xmm1 );
1705 template<
typename MT3
1708 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1709 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1711 typedef IntrinsicTrait<ElementType> IT;
1713 const size_t M( A.rows() );
1714 const size_t N( B.columns() );
1715 const size_t K( A.columns() );
1719 for( ; (i+4UL) <= M; i+=4UL ) {
1721 for( ; (j+2UL) <= N; j+=2UL ) {
1722 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
1723 for(
size_t k=0UL; k<K; k+=IT::size ) {
1730 xmm1 = xmm1 + a1 * b1;
1731 xmm2 = xmm2 + a1 * b2;
1732 xmm3 = xmm3 + a2 * b1;
1733 xmm4 = xmm4 + a2 * b2;
1734 xmm5 = xmm5 + a3 * b1;
1735 xmm6 = xmm6 + a3 * b2;
1736 xmm7 = xmm7 + a4 * b1;
1737 xmm8 = xmm8 + a4 * b2;
1739 (~C)(i ,j ) -=
sum( xmm1 );
1740 (~C)(i ,j+1UL) -=
sum( xmm2 );
1741 (~C)(i+1UL,j ) -=
sum( xmm3 );
1742 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1743 (~C)(i+2UL,j ) -=
sum( xmm5 );
1744 (~C)(i+2UL,j+1UL) -=
sum( xmm6 );
1745 (~C)(i+3UL,j ) -=
sum( xmm7 );
1746 (~C)(i+3UL,j+1UL) -=
sum( xmm8 );
1750 for(
size_t k=0UL; k<K; k+=IT::size ) {
1752 xmm1 = xmm1 + A.load(i ,k) * b1;
1753 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1754 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
1755 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
1757 (~C)(i ,j) -=
sum( xmm1 );
1758 (~C)(i+1UL,j) -=
sum( xmm2 );
1759 (~C)(i+2UL,j) -=
sum( xmm3 );
1760 (~C)(i+3UL,j) -=
sum( xmm4 );
1763 for( ; (i+2UL) <= M; i+=2UL ) {
1765 for( ; (j+2UL) <= N; j+=2UL ) {
1767 for(
size_t k=0UL; k<K; k+=IT::size ) {
1772 xmm1 = xmm1 + a1 * b1;
1773 xmm2 = xmm2 + a1 * b2;
1774 xmm3 = xmm3 + a2 * b1;
1775 xmm4 = xmm4 + a2 * b2;
1777 (~C)(i ,j ) -=
sum( xmm1 );
1778 (~C)(i ,j+1UL) -=
sum( xmm2 );
1779 (~C)(i+1UL,j ) -=
sum( xmm3 );
1780 (~C)(i+1UL,j+1UL) -=
sum( xmm4 );
1784 for(
size_t k=0UL; k<K; k+=IT::size ) {
1786 xmm1 = xmm1 + A.load(i ,k) * b1;
1787 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
1789 (~C)(i ,j) -=
sum( xmm1 );
1790 (~C)(i+1UL,j) -=
sum( xmm2 );
1795 for( ; (j+2UL) <= N; j+=2UL ) {
1797 for(
size_t k=0UL; k<K; k+=IT::size ) {
1799 xmm1 = xmm1 + a1 * B.load(k,j );
1800 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
1802 (~C)(i,j ) -=
sum( xmm1 );
1803 (~C)(i,j+1UL) -=
sum( xmm2 );
1807 for(
size_t k=0UL; k<K; k+=IT::size ) {
1808 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
1810 (~C)(i,j) -=
sum( xmm1 );
1831 template<
typename MT3
1834 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1835 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1837 selectDefaultSubAssignKernel( C, A, B );
1857 template<
typename MT3
1860 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1861 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1863 using boost::numeric_cast;
1869 const int M ( numeric_cast<int>( A.rows() ) );
1870 const int N ( numeric_cast<int>( B.columns() ) );
1871 const int K ( numeric_cast<int>( A.columns() ) );
1872 const int lda( numeric_cast<int>( A.spacing() ) );
1873 const int ldb( numeric_cast<int>( B.spacing() ) );
1874 const int ldc( numeric_cast<int>( C.spacing() ) );
1876 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1877 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1878 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1879 M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1900 template<
typename MT3
1903 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1904 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1906 using boost::numeric_cast;
1912 const int M ( numeric_cast<int>( A.rows() ) );
1913 const int N ( numeric_cast<int>( B.columns() ) );
1914 const int K ( numeric_cast<int>( A.columns() ) );
1915 const int lda( numeric_cast<int>( A.spacing() ) );
1916 const int ldb( numeric_cast<int>( B.spacing() ) );
1917 const int ldc( numeric_cast<int>( C.spacing() ) );
1919 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1920 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1921 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1922 M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1943 template<
typename MT3
1946 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1947 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1949 using boost::numeric_cast;
1958 const int M ( numeric_cast<int>( A.rows() ) );
1959 const int N ( numeric_cast<int>( B.columns() ) );
1960 const int K ( numeric_cast<int>( A.columns() ) );
1961 const int lda( numeric_cast<int>( A.spacing() ) );
1962 const int ldb( numeric_cast<int>( B.spacing() ) );
1963 const int ldc( numeric_cast<int>( C.spacing() ) );
1964 const complex<float> alpha( -1.0F, 0.0F );
1965 const complex<float> beta ( 1.0F, 0.0F );
1967 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1968 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1969 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
1970 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1991 template<
typename MT3
1994 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1995 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1997 using boost::numeric_cast;
2006 const int M ( numeric_cast<int>( A.rows() ) );
2007 const int N ( numeric_cast<int>( B.columns() ) );
2008 const int K ( numeric_cast<int>( A.columns() ) );
2009 const int lda( numeric_cast<int>( A.spacing() ) );
2010 const int ldb( numeric_cast<int>( B.spacing() ) );
2011 const int ldc( numeric_cast<int>( C.spacing() ) );
2012 const complex<double> alpha( -1.0, 0.0 );
2013 const complex<double> beta ( 1.0, 0.0 );
2015 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2016 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2017 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2018 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2064 template<
typename MT1
2068 :
public DenseMatrix< DMatScalarMultExpr< DMatTDMatMultExpr<MT1,MT2>, ST, false >, false >
2069 ,
private MatScalarMultExpr
2070 ,
private Computation
2074 typedef DMatTDMatMultExpr<MT1,MT2> MMM;
2087 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2088 struct UseSinglePrecisionKernel {
2089 enum { value = IsFloat<typename T1::ElementType>::value &&
2090 IsFloat<typename T2::ElementType>::value &&
2091 IsFloat<typename T3::ElementType>::value &&
2092 !IsComplex<T4>::value };
2101 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2102 struct UseDoublePrecisionKernel {
2103 enum { value = IsDouble<typename T1::ElementType>::value &&
2104 IsDouble<typename T2::ElementType>::value &&
2105 IsDouble<typename T3::ElementType>::value &&
2106 !IsComplex<T4>::value };
2115 template<
typename T1,
typename T2,
typename T3 >
2116 struct UseSinglePrecisionComplexKernel {
2117 typedef complex<float> Type;
2118 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2119 IsSame<typename T2::ElementType,Type>::value &&
2120 IsSame<typename T3::ElementType,Type>::value };
2129 template<
typename T1,
typename T2,
typename T3 >
2130 struct UseDoublePrecisionComplexKernel {
2131 typedef complex<double> Type;
2132 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2133 IsSame<typename T2::ElementType,Type>::value &&
2134 IsSame<typename T3::ElementType,Type>::value };
2142 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2143 struct UseDefaultKernel {
2144 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2145 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2146 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2147 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2155 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2156 struct UseVectorizedDefaultKernel {
2157 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2158 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2159 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2160 IsSame<typename T1::ElementType,T4>::value &&
2161 IntrinsicTrait<typename T1::ElementType>::addition &&
2162 IntrinsicTrait<typename T1::ElementType>::multiplication };
2168 typedef DMatScalarMultExpr<MMM,ST,false>
This;
2169 typedef typename MultTrait<RES,ST>::Type
ResultType;
2173 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2178 typedef const DMatTDMatMultExpr<MT1,MT2>
LeftOperand;
2184 typedef typename SelectType< IsComputation<MT1>::value,
const RT1,
CT1 >::Type
LT;
2187 typedef typename SelectType< IsComputation<MT2>::value,
const RT2,
CT2 >::Type
RT;
2192 enum { vectorizable = 0 };
2201 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2217 return matrix_(i,j) * scalar_;
2226 inline size_t rows()
const {
2236 inline size_t columns()
const {
2267 template<
typename T >
2268 inline bool canAlias(
const T* alias )
const {
2269 return matrix_.canAlias( alias );
2279 template<
typename T >
2280 inline bool isAliased(
const T* alias )
const {
2281 return matrix_.isAliased( alias );
2300 template<
typename MT3
2302 friend inline void assign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2309 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2310 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2312 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2315 else if( left.columns() == 0UL ) {
2331 DMatScalarMultExpr::selectDefaultAssignKernel( ~lhs, A, B, rhs.scalar_ );
2333 DMatScalarMultExpr::selectBlasAssignKernel( ~lhs, A, B, rhs.scalar_ );
2351 template<
typename MT3
2355 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2356 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2358 for(
size_t i=0UL; i<A.rows(); ++i ) {
2359 for(
size_t k=0UL; k<B.columns(); ++k ) {
2360 C(i,k) = A(i,0UL) * B(0UL,k);
2362 for(
size_t j=1UL; j<A.columns(); ++j ) {
2363 for(
size_t k=0UL; k<B.columns(); ++k ) {
2364 C(i,k) += A(i,j) * B(j,k);
2367 for(
size_t k=0UL; k<B.columns(); ++k ) {
2388 template<
typename MT3
2392 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2393 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2395 typedef IntrinsicTrait<ElementType> IT;
2397 const size_t M( A.rows() );
2398 const size_t N( B.columns() );
2399 const size_t K( A.columns() );
2403 for( ; (i+2UL) <= M; i+=2UL ) {
2405 for( ; (j+4UL) <= N; j+=4UL ) {
2406 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2407 for(
size_t k=0UL; k<K; k+=IT::size ) {
2414 xmm1 = xmm1 + a1 * b1;
2415 xmm2 = xmm2 + a1 * b2;
2416 xmm3 = xmm3 + a1 * b3;
2417 xmm4 = xmm4 + a1 * b4;
2418 xmm5 = xmm5 + a2 * b1;
2419 xmm6 = xmm6 + a2 * b2;
2420 xmm7 = xmm7 + a2 * b3;
2421 xmm8 = xmm8 + a2 * b4;
2423 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2424 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2425 (~C)(i ,j+2UL) =
sum( xmm3 ) * scalar;
2426 (~C)(i ,j+3UL) =
sum( xmm4 ) * scalar;
2427 (~C)(i+1UL,j ) =
sum( xmm5 ) * scalar;
2428 (~C)(i+1UL,j+1UL) =
sum( xmm6 ) * scalar;
2429 (~C)(i+1UL,j+2UL) =
sum( xmm7 ) * scalar;
2430 (~C)(i+1UL,j+3UL) =
sum( xmm8 ) * scalar;
2432 for( ; (j+2UL) <= N; j+=2UL ) {
2434 for(
size_t k=0UL; k<K; k+=IT::size ) {
2439 xmm1 = xmm1 + a1 * b1;
2440 xmm2 = xmm2 + a1 * b2;
2441 xmm3 = xmm3 + a2 * b1;
2442 xmm4 = xmm4 + a2 * b2;
2444 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2445 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2446 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
2447 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
2451 for(
size_t k=0UL; k<K; k+=IT::size ) {
2453 xmm1 = xmm1 + A.load(i ,k) * b1;
2454 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2456 (~C)(i ,j) =
sum( xmm1 ) * scalar;
2457 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
2462 for( ; (j+4UL) <= N; j+=4UL ) {
2464 for(
size_t k=0UL; k<K; k+=IT::size ) {
2466 xmm1 = xmm1 + a1 * B.load(k,j );
2467 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2468 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
2469 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
2471 (~C)(i,j ) =
sum( xmm1 ) * scalar;
2472 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
2473 (~C)(i,j+2UL) =
sum( xmm3 ) * scalar;
2474 (~C)(i,j+3UL) =
sum( xmm4 ) * scalar;
2476 for( ; (j+2UL) <= N; j+=2UL ) {
2478 for(
size_t k=0UL; k<K; k+=IT::size ) {
2480 xmm1 = xmm1 + a1 * B.load(k,j );
2481 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2483 (~C)(i,j ) =
sum( xmm1 ) * scalar;
2484 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
2488 for(
size_t k=0UL; k<K; k+=IT::size ) {
2489 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2491 (~C)(i,j) =
sum( xmm1 ) * scalar;
2511 template<
typename MT3
2515 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2516 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2518 typedef IntrinsicTrait<ElementType> IT;
2520 const size_t M( A.rows() );
2521 const size_t N( B.columns() );
2522 const size_t K( A.columns() );
2526 for( ; (i+4UL) <= M; i+=4UL ) {
2528 for( ; (j+2UL) <= N; j+=2UL ) {
2529 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2530 for(
size_t k=0UL; k<K; k+=IT::size ) {
2537 xmm1 = xmm1 + a1 * b1;
2538 xmm2 = xmm2 + a1 * b2;
2539 xmm3 = xmm3 + a2 * b1;
2540 xmm4 = xmm4 + a2 * b2;
2541 xmm5 = xmm5 + a3 * b1;
2542 xmm6 = xmm6 + a3 * b2;
2543 xmm7 = xmm7 + a4 * b1;
2544 xmm8 = xmm8 + a4 * b2;
2546 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2547 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2548 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
2549 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
2550 (~C)(i+2UL,j ) =
sum( xmm5 ) * scalar;
2551 (~C)(i+2UL,j+1UL) =
sum( xmm6 ) * scalar;
2552 (~C)(i+3UL,j ) =
sum( xmm7 ) * scalar;
2553 (~C)(i+3UL,j+1UL) =
sum( xmm8 ) * scalar;
2557 for(
size_t k=0UL; k<K; k+=IT::size ) {
2559 xmm1 = xmm1 + A.load(i ,k) * b1;
2560 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2561 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
2562 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
2564 (~C)(i ,j) =
sum( xmm1 ) * scalar;
2565 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
2566 (~C)(i+2UL,j) =
sum( xmm3 ) * scalar;
2567 (~C)(i+3UL,j) =
sum( xmm4 ) * scalar;
2570 for( ; (i+2UL) <= M; i+=2UL ) {
2572 for( ; (j+2UL) <= N; j+=2UL ) {
2574 for(
size_t k=0UL; k<K; k+=IT::size ) {
2579 xmm1 = xmm1 + a1 * b1;
2580 xmm2 = xmm2 + a1 * b2;
2581 xmm3 = xmm3 + a2 * b1;
2582 xmm4 = xmm4 + a2 * b2;
2584 (~C)(i ,j ) =
sum( xmm1 ) * scalar;
2585 (~C)(i ,j+1UL) =
sum( xmm2 ) * scalar;
2586 (~C)(i+1UL,j ) =
sum( xmm3 ) * scalar;
2587 (~C)(i+1UL,j+1UL) =
sum( xmm4 ) * scalar;
2591 for(
size_t k=0UL; k<K; k+=IT::size ) {
2593 xmm1 = xmm1 + A.load(i ,k) * b1;
2594 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
2596 (~C)(i ,j) =
sum( xmm1 ) * scalar;
2597 (~C)(i+1UL,j) =
sum( xmm2 ) * scalar;
2602 for( ; (j+2UL) <= N; j+=2UL ) {
2604 for(
size_t k=0UL; k<K; k+=IT::size ) {
2606 xmm1 = xmm1 + a1 * B.load(k,j );
2607 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
2609 (~C)(i,j ) =
sum( xmm1 ) * scalar;
2610 (~C)(i,j+1UL) =
sum( xmm2 ) * scalar;
2614 for(
size_t k=0UL; k<K; k+=IT::size ) {
2615 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
2617 (~C)(i,j) =
sum( xmm1 ) * scalar;
2637 template<
typename MT3
2641 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2642 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2644 selectDefaultAssignKernel( C, A, B, scalar );
2663 template<
typename MT3
2667 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2668 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2670 using boost::numeric_cast;
2676 const int M ( numeric_cast<int>( A.rows() ) );
2677 const int N ( numeric_cast<int>( B.columns() ) );
2678 const int K ( numeric_cast<int>( A.columns() ) );
2679 const int lda( numeric_cast<int>( A.spacing() ) );
2680 const int ldb( numeric_cast<int>( B.spacing() ) );
2681 const int ldc( numeric_cast<int>( C.spacing() ) );
2683 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2684 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2685 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2686 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2706 template<
typename MT3
2710 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2711 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2713 using boost::numeric_cast;
2719 const int M ( numeric_cast<int>( A.rows() ) );
2720 const int N ( numeric_cast<int>( B.columns() ) );
2721 const int K ( numeric_cast<int>( A.columns() ) );
2722 const int lda( numeric_cast<int>( A.spacing() ) );
2723 const int ldb( numeric_cast<int>( B.spacing() ) );
2724 const int ldc( numeric_cast<int>( C.spacing() ) );
2726 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2727 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2728 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2729 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2749 template<
typename MT3
2753 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2754 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2756 using boost::numeric_cast;
2765 const int M ( numeric_cast<int>( A.rows() ) );
2766 const int N ( numeric_cast<int>( B.columns() ) );
2767 const int K ( numeric_cast<int>( A.columns() ) );
2768 const int lda( numeric_cast<int>( A.spacing() ) );
2769 const int ldb( numeric_cast<int>( B.spacing() ) );
2770 const int ldc( numeric_cast<int>( C.spacing() ) );
2771 const complex<float> alpha( scalar );
2772 const complex<float> beta ( 0.0F, 0.0F );
2774 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2775 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2776 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2777 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2797 template<
typename MT3
2801 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2802 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2804 using boost::numeric_cast;
2813 const int M ( numeric_cast<int>( A.rows() ) );
2814 const int N ( numeric_cast<int>( B.columns() ) );
2815 const int K ( numeric_cast<int>( A.columns() ) );
2816 const int lda( numeric_cast<int>( A.spacing() ) );
2817 const int ldb( numeric_cast<int>( B.spacing() ) );
2818 const int ldc( numeric_cast<int>( C.spacing() ) );
2819 const complex<double> alpha( scalar );
2820 const complex<double> beta ( 0.0, 0.0 );
2822 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2823 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2824 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
2825 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2842 template<
typename MT
2844 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
2848 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2860 const TmpType tmp( rhs );
2877 template<
typename MT3
2879 friend inline void addAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
2886 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2887 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2889 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
2904 DMatScalarMultExpr::selectDefaultAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2906 DMatScalarMultExpr::selectBlasAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
2924 template<
typename MT3
2928 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2929 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2950 template<
typename MT3
2954 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2955 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2957 typedef IntrinsicTrait<ElementType> IT;
2959 const size_t M( A.rows() );
2960 const size_t N( B.columns() );
2961 const size_t K( A.columns() );
2965 for( ; (i+2UL) <= M; i+=2UL ) {
2967 for( ; (j+4UL) <= N; j+=4UL ) {
2968 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2969 for(
size_t k=0UL; k<K; k+=IT::size ) {
2976 xmm1 = xmm1 + a1 * b1;
2977 xmm2 = xmm2 + a1 * b2;
2978 xmm3 = xmm3 + a1 * b3;
2979 xmm4 = xmm4 + a1 * b4;
2980 xmm5 = xmm5 + a2 * b1;
2981 xmm6 = xmm6 + a2 * b2;
2982 xmm7 = xmm7 + a2 * b3;
2983 xmm8 = xmm8 + a2 * b4;
2985 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
2986 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
2987 (~C)(i ,j+2UL) +=
sum( xmm3 ) * scalar;
2988 (~C)(i ,j+3UL) +=
sum( xmm4 ) * scalar;
2989 (~C)(i+1UL,j ) +=
sum( xmm5 ) * scalar;
2990 (~C)(i+1UL,j+1UL) +=
sum( xmm6 ) * scalar;
2991 (~C)(i+1UL,j+2UL) +=
sum( xmm7 ) * scalar;
2992 (~C)(i+1UL,j+3UL) +=
sum( xmm8 ) * scalar;
2994 for( ; (j+2UL) <= N; j+=2UL ) {
2996 for(
size_t k=0UL; k<K; k+=IT::size ) {
3001 xmm1 = xmm1 + a1 * b1;
3002 xmm2 = xmm2 + a1 * b2;
3003 xmm3 = xmm3 + a2 * b1;
3004 xmm4 = xmm4 + a2 * b2;
3006 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3007 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3008 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3009 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3013 for(
size_t k=0UL; k<K; k+=IT::size ) {
3015 xmm1 = xmm1 + A.load(i ,k) * b1;
3016 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3018 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3019 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3024 for( ; (j+4UL) <= N; j+=4UL ) {
3026 for(
size_t k=0UL; k<K; k+=IT::size ) {
3028 xmm1 = xmm1 + a1 * B.load(k,j );
3029 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3030 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3031 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3033 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3034 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3035 (~C)(i,j+2UL) +=
sum( xmm3 ) * scalar;
3036 (~C)(i,j+3UL) +=
sum( xmm4 ) * scalar;
3038 for( ; (j+2UL) <= N; j+=2UL ) {
3040 for(
size_t k=0UL; k<K; k+=IT::size ) {
3042 xmm1 = xmm1 + a1 * B.load(k,j );
3043 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3045 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3046 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3050 for(
size_t k=0UL; k<K; k+=IT::size ) {
3051 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3053 (~C)(i,j) +=
sum( xmm1 ) * scalar;
3073 template<
typename MT3
3077 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3078 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3080 typedef IntrinsicTrait<ElementType> IT;
3082 const size_t M( A.rows() );
3083 const size_t N( B.columns() );
3084 const size_t K( A.columns() );
3088 for( ; (i+4UL) <= M; i+=4UL ) {
3090 for( ; (j+2UL) <= N; j+=2UL ) {
3091 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3092 for(
size_t k=0UL; k<K; k+=IT::size ) {
3099 xmm1 = xmm1 + a1 * b1;
3100 xmm2 = xmm2 + a1 * b2;
3101 xmm3 = xmm3 + a2 * b1;
3102 xmm4 = xmm4 + a2 * b2;
3103 xmm5 = xmm5 + a3 * b1;
3104 xmm6 = xmm6 + a3 * b2;
3105 xmm7 = xmm7 + a4 * b1;
3106 xmm8 = xmm8 + a4 * b2;
3108 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3109 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3110 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3111 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3112 (~C)(i+2UL,j ) +=
sum( xmm5 ) * scalar;
3113 (~C)(i+2UL,j+1UL) +=
sum( xmm6 ) * scalar;
3114 (~C)(i+3UL,j ) +=
sum( xmm7 ) * scalar;
3115 (~C)(i+3UL,j+1UL) +=
sum( xmm8 ) * scalar;
3119 for(
size_t k=0UL; k<K; k+=IT::size ) {
3121 xmm1 = xmm1 + A.load(i ,k) * b1;
3122 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3123 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3124 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3126 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3127 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3128 (~C)(i+2UL,j) +=
sum( xmm3 ) * scalar;
3129 (~C)(i+3UL,j) +=
sum( xmm4 ) * scalar;
3132 for( ; (i+2UL) <= M; i+=2UL ) {
3134 for( ; (j+2UL) <= N; j+=2UL ) {
3136 for(
size_t k=0UL; k<K; k+=IT::size ) {
3141 xmm1 = xmm1 + a1 * b1;
3142 xmm2 = xmm2 + a1 * b2;
3143 xmm3 = xmm3 + a2 * b1;
3144 xmm4 = xmm4 + a2 * b2;
3146 (~C)(i ,j ) +=
sum( xmm1 ) * scalar;
3147 (~C)(i ,j+1UL) +=
sum( xmm2 ) * scalar;
3148 (~C)(i+1UL,j ) +=
sum( xmm3 ) * scalar;
3149 (~C)(i+1UL,j+1UL) +=
sum( xmm4 ) * scalar;
3153 for(
size_t k=0UL; k<K; k+=IT::size ) {
3155 xmm1 = xmm1 + A.load(i ,k) * b1;
3156 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3158 (~C)(i ,j) +=
sum( xmm1 ) * scalar;
3159 (~C)(i+1UL,j) +=
sum( xmm2 ) * scalar;
3164 for( ; (j+2UL) <= N; j+=2UL ) {
3166 for(
size_t k=0UL; k<K; k+=IT::size ) {
3168 xmm1 = xmm1 + a1 * B.load(k,j );
3169 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3171 (~C)(i,j ) +=
sum( xmm1 ) * scalar;
3172 (~C)(i,j+1UL) +=
sum( xmm2 ) * scalar;
3176 for(
size_t k=0UL; k<K; k+=IT::size ) {
3177 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3179 (~C)(i,j) +=
sum( xmm1 ) * scalar;
3199 template<
typename MT3
3203 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3204 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3206 selectDefaultAddAssignKernel( C, A, B, scalar );
3225 template<
typename MT3
3229 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3230 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3232 using boost::numeric_cast;
3238 const int M ( numeric_cast<int>( A.rows() ) );
3239 const int N ( numeric_cast<int>( B.columns() ) );
3240 const int K ( numeric_cast<int>( A.columns() ) );
3241 const int lda( numeric_cast<int>( A.spacing() ) );
3242 const int ldb( numeric_cast<int>( B.spacing() ) );
3243 const int ldc( numeric_cast<int>( C.spacing() ) );
3245 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3246 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3247 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3248 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3268 template<
typename MT3
3272 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3273 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3275 using boost::numeric_cast;
3281 const int M ( numeric_cast<int>( A.rows() ) );
3282 const int N ( numeric_cast<int>( B.columns() ) );
3283 const int K ( numeric_cast<int>( A.columns() ) );
3284 const int lda( numeric_cast<int>( A.spacing() ) );
3285 const int ldb( numeric_cast<int>( B.spacing() ) );
3286 const int ldc( numeric_cast<int>( C.spacing() ) );
3288 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3289 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3290 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3291 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3311 template<
typename MT3
3315 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3316 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3318 using boost::numeric_cast;
3327 const int M ( numeric_cast<int>( A.rows() ) );
3328 const int N ( numeric_cast<int>( B.columns() ) );
3329 const int K ( numeric_cast<int>( A.columns() ) );
3330 const int lda( numeric_cast<int>( A.spacing() ) );
3331 const int ldb( numeric_cast<int>( B.spacing() ) );
3332 const int ldc( numeric_cast<int>( C.spacing() ) );
3333 const complex<float> alpha( scalar );
3334 const complex<float> beta ( 1.0F, 0.0F );
3336 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3337 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3338 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3339 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3359 template<
typename MT3
3363 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3364 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3366 using boost::numeric_cast;
3375 const int M ( numeric_cast<int>( A.rows() ) );
3376 const int N ( numeric_cast<int>( B.columns() ) );
3377 const int K ( numeric_cast<int>( A.columns() ) );
3378 const int lda( numeric_cast<int>( A.spacing() ) );
3379 const int ldb( numeric_cast<int>( B.spacing() ) );
3380 const int ldc( numeric_cast<int>( C.spacing() ) );
3381 const complex<double> alpha( scalar );
3382 const complex<double> beta ( 1.0, 0.0 );
3384 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3385 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3386 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3387 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3408 template<
typename MT3
3410 friend inline void subAssign( DenseMatrix<MT3,SO>& lhs,
const DMatScalarMultExpr& rhs )
3417 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3418 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3420 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3435 DMatScalarMultExpr::selectDefaultSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3437 DMatScalarMultExpr::selectBlasSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3455 template<
typename MT3
3459 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3460 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3481 template<
typename MT3
3485 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3486 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3488 typedef IntrinsicTrait<ElementType> IT;
3490 const size_t M( A.rows() );
3491 const size_t N( B.columns() );
3492 const size_t K( A.columns() );
3496 for( ; (i+2UL) <= M; i+=2UL ) {
3498 for( ; (j+4UL) <= N; j+=4UL ) {
3499 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3500 for(
size_t k=0UL; k<K; k+=IT::size ) {
3507 xmm1 = xmm1 + a1 * b1;
3508 xmm2 = xmm2 + a1 * b2;
3509 xmm3 = xmm3 + a1 * b3;
3510 xmm4 = xmm4 + a1 * b4;
3511 xmm5 = xmm5 + a2 * b1;
3512 xmm6 = xmm6 + a2 * b2;
3513 xmm7 = xmm7 + a2 * b3;
3514 xmm8 = xmm8 + a2 * b4;
3516 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3517 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3518 (~C)(i ,j+2UL) -=
sum( xmm3 ) * scalar;
3519 (~C)(i ,j+3UL) -=
sum( xmm4 ) * scalar;
3520 (~C)(i+1UL,j ) -=
sum( xmm5 ) * scalar;
3521 (~C)(i+1UL,j+1UL) -=
sum( xmm6 ) * scalar;
3522 (~C)(i+1UL,j+2UL) -=
sum( xmm7 ) * scalar;
3523 (~C)(i+1UL,j+3UL) -=
sum( xmm8 ) * scalar;
3525 for( ; (j+2UL) <= N; j+=2UL ) {
3527 for(
size_t k=0UL; k<K; k+=IT::size ) {
3532 xmm1 = xmm1 + a1 * b1;
3533 xmm2 = xmm2 + a1 * b2;
3534 xmm3 = xmm3 + a2 * b1;
3535 xmm4 = xmm4 + a2 * b2;
3537 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3538 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3539 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
3540 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
3544 for(
size_t k=0UL; k<K; k+=IT::size ) {
3546 xmm1 = xmm1 + A.load(i ,k) * b1;
3547 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3549 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
3550 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
3555 for( ; (j+4UL) <= N; j+=4UL ) {
3557 for(
size_t k=0UL; k<K; k+=IT::size ) {
3559 xmm1 = xmm1 + a1 * B.load(k,j );
3560 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3561 xmm3 = xmm3 + a1 * B.load(k,j+2UL);
3562 xmm4 = xmm4 + a1 * B.load(k,j+3UL);
3564 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
3565 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
3566 (~C)(i,j+2UL) -=
sum( xmm3 ) * scalar;
3567 (~C)(i,j+3UL) -=
sum( xmm4 ) * scalar;
3569 for( ; (j+2UL) <= N; j+=2UL ) {
3571 for(
size_t k=0UL; k<K; k+=IT::size ) {
3573 xmm1 = xmm1 + a1 * B.load(k,j );
3574 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3576 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
3577 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
3581 for(
size_t k=0UL; k<K; k+=IT::size ) {
3582 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3584 (~C)(i,j) -=
sum( xmm1 ) * scalar;
3604 template<
typename MT3
3608 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3609 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3611 typedef IntrinsicTrait<ElementType> IT;
3613 const size_t M( A.rows() );
3614 const size_t N( B.columns() );
3615 const size_t K( A.columns() );
3619 for( ; (i+4UL) <= M; i+=4UL ) {
3621 for( ; (j+2UL) <= N; j+=2UL ) {
3622 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3623 for(
size_t k=0UL; k<K; k+=IT::size ) {
3630 xmm1 = xmm1 + a1 * b1;
3631 xmm2 = xmm2 + a1 * b2;
3632 xmm3 = xmm3 + a2 * b1;
3633 xmm4 = xmm4 + a2 * b2;
3634 xmm5 = xmm5 + a3 * b1;
3635 xmm6 = xmm6 + a3 * b2;
3636 xmm7 = xmm7 + a4 * b1;
3637 xmm8 = xmm8 + a4 * b2;
3639 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3640 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3641 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
3642 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
3643 (~C)(i+2UL,j ) -=
sum( xmm5 ) * scalar;
3644 (~C)(i+2UL,j+1UL) -=
sum( xmm6 ) * scalar;
3645 (~C)(i+3UL,j ) -=
sum( xmm7 ) * scalar;
3646 (~C)(i+3UL,j+1UL) -=
sum( xmm8 ) * scalar;
3650 for(
size_t k=0UL; k<K; k+=IT::size ) {
3652 xmm1 = xmm1 + A.load(i ,k) * b1;
3653 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3654 xmm3 = xmm3 + A.load(i+2UL,k) * b1;
3655 xmm4 = xmm4 + A.load(i+3UL,k) * b1;
3657 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
3658 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
3659 (~C)(i+2UL,j) -=
sum( xmm3 ) * scalar;
3660 (~C)(i+3UL,j) -=
sum( xmm4 ) * scalar;
3663 for( ; (i+2UL) <= M; i+=2UL ) {
3665 for( ; (j+2UL) <= N; j+=2UL ) {
3667 for(
size_t k=0UL; k<K; k+=IT::size ) {
3672 xmm1 = xmm1 + a1 * b1;
3673 xmm2 = xmm2 + a1 * b2;
3674 xmm3 = xmm3 + a2 * b1;
3675 xmm4 = xmm4 + a2 * b2;
3677 (~C)(i ,j ) -=
sum( xmm1 ) * scalar;
3678 (~C)(i ,j+1UL) -=
sum( xmm2 ) * scalar;
3679 (~C)(i+1UL,j ) -=
sum( xmm3 ) * scalar;
3680 (~C)(i+1UL,j+1UL) -=
sum( xmm4 ) * scalar;
3684 for(
size_t k=0UL; k<K; k+=IT::size ) {
3686 xmm1 = xmm1 + A.load(i ,k) * b1;
3687 xmm2 = xmm2 + A.load(i+1UL,k) * b1;
3689 (~C)(i ,j) -=
sum( xmm1 ) * scalar;
3690 (~C)(i+1UL,j) -=
sum( xmm2 ) * scalar;
3695 for( ; (j+2UL) <= N; j+=2UL ) {
3697 for(
size_t k=0UL; k<K; k+=IT::size ) {
3699 xmm1 = xmm1 + a1 * B.load(k,j );
3700 xmm2 = xmm2 + a1 * B.load(k,j+1UL);
3702 (~C)(i,j ) -=
sum( xmm1 ) * scalar;
3703 (~C)(i,j+1UL) -=
sum( xmm2 ) * scalar;
3707 for(
size_t k=0UL; k<K; k+=IT::size ) {
3708 xmm1 = xmm1 + A.load(i,k) * B.load(k,j);
3710 (~C)(i,j) -=
sum( xmm1 ) * scalar;
3730 template<
typename MT3
3734 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3735 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3737 selectDefaultSubAssignKernel( C, A, B, scalar );
3756 template<
typename MT3
3760 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3761 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3763 using boost::numeric_cast;
3769 const int M ( numeric_cast<int>( A.rows() ) );
3770 const int N ( numeric_cast<int>( B.columns() ) );
3771 const int K ( numeric_cast<int>( A.columns() ) );
3772 const int lda( numeric_cast<int>( A.spacing() ) );
3773 const int ldb( numeric_cast<int>( B.spacing() ) );
3774 const int ldc( numeric_cast<int>( C.spacing() ) );
3776 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3777 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3778 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3779 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3799 template<
typename MT3
3803 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3804 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3806 using boost::numeric_cast;
3812 const int M ( numeric_cast<int>( A.rows() ) );
3813 const int N ( numeric_cast<int>( B.columns() ) );
3814 const int K ( numeric_cast<int>( A.columns() ) );
3815 const int lda( numeric_cast<int>( A.spacing() ) );
3816 const int ldb( numeric_cast<int>( B.spacing() ) );
3817 const int ldc( numeric_cast<int>( C.spacing() ) );
3819 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3820 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3821 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3822 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3842 template<
typename MT3
3846 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3847 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3849 using boost::numeric_cast;
3858 const int M ( numeric_cast<int>( A.rows() ) );
3859 const int N ( numeric_cast<int>( B.columns() ) );
3860 const int K ( numeric_cast<int>( A.columns() ) );
3861 const int lda( numeric_cast<int>( A.spacing() ) );
3862 const int ldb( numeric_cast<int>( B.spacing() ) );
3863 const int ldc( numeric_cast<int>( C.spacing() ) );
3864 const complex<float> alpha( -scalar );
3865 const complex<float> beta ( 1.0F, 0.0F );
3867 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3868 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3869 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3870 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3890 template<
typename MT3
3894 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3895 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3897 using boost::numeric_cast;
3906 const int M ( numeric_cast<int>( A.rows() ) );
3907 const int N ( numeric_cast<int>( B.columns() ) );
3908 const int K ( numeric_cast<int>( A.columns() ) );
3909 const int lda( numeric_cast<int>( A.spacing() ) );
3910 const int ldb( numeric_cast<int>( B.spacing() ) );
3911 const int ldc( numeric_cast<int>( C.spacing() ) );
3912 const complex<double> alpha( -scalar );
3913 const complex<double> beta ( 1.0, 0.0 );
3915 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3916 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3917 ( IsRowMajorMatrix<MT3>::value )?( CblasTrans ):( CblasNoTrans ),
3918 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3987 template<
typename T1
3989 inline const DMatTDMatMultExpr<T1,T2>
3995 throw std::invalid_argument(
"Matrix sizes do not match" );
4012 template<
typename MT1,
typename MT2,
typename VT >
4017 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4018 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4019 IsDenseVector<VT>::value && IsColumnVector<VT>::value
4020 ,
typename DMatDVecMultExprTrait< MT1, typename TDMatDVecMultExprTrait<MT2,VT>::Type >::Type
4021 , INVALID_TYPE >::Type Type;
4030 template<
typename MT1,
typename MT2,
typename VT >
4035 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4036 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value &&
4037 IsSparseVector<VT>::value && IsColumnVector<VT>::value
4038 ,
typename DMatDVecMultExprTrait< MT1, typename TDMatSVecMultExprTrait<MT2,VT>::Type >::Type
4039 , INVALID_TYPE >::Type Type;
4048 template<
typename VT,
typename MT1,
typename MT2 >
4053 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4054 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4055 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4056 ,
typename TDVecTDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4057 , INVALID_TYPE >::Type Type;
4066 template<
typename VT,
typename MT1,
typename MT2 >
4071 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4072 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4073 IsDenseMatrix<MT2>::value && IsColumnMajorMatrix<MT2>::value
4074 ,
typename TDVecTDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4075 , INVALID_TYPE >::Type Type;
4084 template<
typename MT1,
typename MT2 >
4089 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1>::Type
4090 ,
typename SubmatrixExprTrait<const MT2>::Type >::Type Type;
4099 template<
typename MT1,
typename MT2 >
4104 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4113 template<
typename MT1,
typename MT2 >
4118 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatTDMatMultExpr.h:221
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:229
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4512
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:3703
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatTDMatMultExpr.h:223
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatTDMatMultExpr.h:267
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:196
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
int16_t sum(const sse_int16_t &a)
Returns the sum of all elements in the 16-bit integral intrinsic vector.
Definition: Reduction.h:62
Header file for the ColumnExprTrait class template.
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
Header file for the IsColumnMajorMatrix type trait.
SelectType< IsComputation< MT2 >::value, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:238
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2375
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:248
DMatTDMatMultExpr< MT1, MT2 > This
Type of this DMatTDMatMultExpr instance.
Definition: DMatTDMatMultExpr.h:219
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:246
Header file for the TDVecSMatMultExprTrait class template.
Compile time check for double precision floating point types.This type trait tests whether or not the...
Definition: IsDouble.h:75
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:122
Constraint on the data type.
Constraint on the data type.
Header file for the MultExprTrait class template.
DMatTDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatTDMatMultExpr class.
Definition: DMatTDMatMultExpr.h:252
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatTDMatMultExpr.h:307
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatTDMatMultExpr.h:226
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
SelectType< IsComputation< MT1 >::value, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:235
Header file for the TDMatSVecMultExprTrait class template.
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatTDMatMultExpr.h:351
Header file for the DenseMatrix base class.
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:121
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:179
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:123
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2373
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
Header file for the IsDenseMatrix type trait.
Header file for the EnableIf class template.
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatTDMatMultExpr.h:224
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatTDMatMultExpr.h:297
Header file for the IsNumeric type trait.
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:648
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:220
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:209
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:239
const size_t DMATTDMATMULT_THRESHOLD
Row-major dense matrix/column-major dense matrix multiplication threshold.This setting specifies the ...
Definition: Thresholds.h:136
Substitution Failure Is Not An Error (SFINAE) class.The DisableIf class template is an auxiliary tool...
Definition: DisableIf.h:184
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatTDMatMultExpr.h:339
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatTDMatMultExpr.h:317
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:247
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:225
Header file for the TDMatDVecMultExprTrait class template.
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2370
Header file for basic type definitions.
Header file for the IsComplex type trait.
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:358
Header file for the complex data type.
Expression object for dense matrix-transpose dense matrix multiplications.The DMatTDMatMultExpr class...
Definition: DMatTDMatMultExpr.h:114
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:120
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatTDMatMultExpr.h:359
RightOperand rightOperand() const
Returns the right-hand side transpose dense matrix operand.
Definition: DMatTDMatMultExpr.h:327
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatTDMatMultExpr.h:222
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
Compile time check for single precision floating point types.This type trait tests whether or not the...
Definition: IsFloat.h:75
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
Operand matrix_
The dense matrix containing the submatrix.
Definition: DenseSubmatrix.h:2792
Header file for the TDVecTDMatMultExprTrait class template.
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatTDMatMultExpr.h:232
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.