35 #ifndef _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
36 #define _BLAZE_MATH_EXPRESSIONS_DMATDMATMULTEXPR_H_
44 #include <boost/cast.hpp>
113 template<
typename MT1
145 template<
typename MT >
146 struct UseSMPAssign {
147 enum { value = ( evaluateLeft || evaluateRight ) };
157 template<
typename T1,
typename T2,
typename T3 >
158 struct UseSinglePrecisionKernel {
159 enum { value = IsFloat<typename T1::ElementType>::value &&
160 IsFloat<typename T2::ElementType>::value &&
161 IsFloat<typename T3::ElementType>::value };
171 template<
typename T1,
typename T2,
typename T3 >
172 struct UseDoublePrecisionKernel {
173 enum { value = IsDouble<typename T1::ElementType>::value &&
174 IsDouble<typename T2::ElementType>::value &&
175 IsDouble<typename T3::ElementType>::value };
186 template<
typename T1,
typename T2,
typename T3 >
187 struct UseSinglePrecisionComplexKernel {
188 typedef complex<float> Type;
189 enum { value = IsSame<typename T1::ElementType,Type>::value &&
190 IsSame<typename T2::ElementType,Type>::value &&
191 IsSame<typename T3::ElementType,Type>::value };
202 template<
typename T1,
typename T2,
typename T3 >
203 struct UseDoublePrecisionComplexKernel {
204 typedef complex<double> Type;
205 enum { value = IsSame<typename T1::ElementType,Type>::value &&
206 IsSame<typename T2::ElementType,Type>::value &&
207 IsSame<typename T3::ElementType,Type>::value };
217 template<
typename T1,
typename T2,
typename T3 >
218 struct UseDefaultKernel {
219 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3>::value &&
220 !UseDoublePrecisionKernel<T1,T2,T3>::value &&
221 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
222 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
232 template<
typename T1,
typename T2,
typename T3 >
233 struct UseVectorizedDefaultKernel {
234 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
235 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
236 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
237 IntrinsicTrait<typename T1::ElementType>::addition &&
238 IntrinsicTrait<typename T1::ElementType>::subtraction &&
239 IntrinsicTrait<typename T1::ElementType>::multiplication };
270 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
276 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
277 !evaluateRight && MT2::smpAssignable };
307 if(
lhs_.columns() != 0UL ) {
308 const size_t end( ( (
lhs_.columns()-1UL ) &
size_t(-2) ) + 1UL );
310 for(
size_t k=1UL; k<end; k+=2UL ) {
312 tmp +=
lhs_(i,k+1UL) *
rhs_(k+1UL,j);
314 if( end <
lhs_.columns() ) {
342 return rhs_.columns();
372 template<
typename T >
374 return (
lhs_.canAlias( alias ) ||
rhs_.canAlias( alias ) );
384 template<
typename T >
386 return (
lhs_.isAliased( alias ) ||
rhs_.isAliased( alias ) );
396 return lhs_.isAligned() &&
rhs_.isAligned();
431 template<
typename MT
440 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
443 else if( rhs.
lhs_.columns() == 0UL ) {
458 DMatDMatMultExpr::selectAssignKernel( ~lhs, A, B );
474 template<
typename MT3
477 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
480 DMatDMatMultExpr::selectDefaultAssignKernel( C, A, B );
482 DMatDMatMultExpr::selectBlasAssignKernel( C, A, B );
500 template<
typename MT3
503 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
504 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
506 const size_t M( A.rows() );
507 const size_t N( B.columns() );
508 const size_t K( A.columns() );
510 for(
size_t i=0UL; i<M; ++i ) {
511 for(
size_t j=0UL; j<N; ++j ) {
512 C(i,j) = A(i,0UL) * B(0UL,j);
514 for(
size_t k=1UL; k<K; ++k ) {
515 for(
size_t j=0UL; j<N; ++j ) {
516 C(i,j) += A(i,k) * B(k,j);
538 template<
typename MT3
541 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
542 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
544 typedef IntrinsicTrait<ElementType> IT;
546 const size_t M( A.rows() );
547 const size_t N( B.columns() );
548 const size_t K( A.columns() );
552 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
553 for(
size_t i=0UL; i<M; ++i ) {
554 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
555 for(
size_t k=0UL; k<K; ++k ) {
557 xmm1 = xmm1 + a1 * B.load(k,j );
558 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
559 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
560 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
561 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
562 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
563 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
564 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
566 (~C).
store( i, j , xmm1 );
567 (~C).
store( i, j+IT::size , xmm2 );
568 (~C).
store( i, j+IT::size*2UL, xmm3 );
569 (~C).
store( i, j+IT::size*3UL, xmm4 );
570 (~C).
store( i, j+IT::size*4UL, xmm5 );
571 (~C).
store( i, j+IT::size*5UL, xmm6 );
572 (~C).
store( i, j+IT::size*6UL, xmm7 );
573 (~C).
store( i, j+IT::size*7UL, xmm8 );
576 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
578 for( ; (i+2UL) <= M; i+=2UL ) {
579 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
580 for(
size_t k=0UL; k<K; ++k ) {
587 xmm1 = xmm1 + a1 * b1;
588 xmm2 = xmm2 + a1 * b2;
589 xmm3 = xmm3 + a1 * b3;
590 xmm4 = xmm4 + a1 * b4;
591 xmm5 = xmm5 + a2 * b1;
592 xmm6 = xmm6 + a2 * b2;
593 xmm7 = xmm7 + a2 * b3;
594 xmm8 = xmm8 + a2 * b4;
596 (~C).
store( i , j , xmm1 );
597 (~C).
store( i , j+IT::size , xmm2 );
598 (~C).
store( i , j+IT::size*2UL, xmm3 );
599 (~C).
store( i , j+IT::size*3UL, xmm4 );
600 (~C).
store( i+1UL, j , xmm5 );
601 (~C).
store( i+1UL, j+IT::size , xmm6 );
602 (~C).
store( i+1UL, j+IT::size*2UL, xmm7 );
603 (~C).
store( i+1UL, j+IT::size*3UL, xmm8 );
607 for(
size_t k=0UL; k<K; ++k ) {
609 xmm1 = xmm1 + a1 * B.load(k,j );
610 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
611 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
612 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
614 (~C).
store( i, j , xmm1 );
615 (~C).
store( i, j+IT::size , xmm2 );
616 (~C).
store( i, j+IT::size*2UL, xmm3 );
617 (~C).
store( i, j+IT::size*3UL, xmm4 );
620 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
622 for( ; (i+2UL) <= M; i+=2UL ) {
624 for(
size_t k=0UL; k<K; ++k ) {
629 xmm1 = xmm1 + a1 * b1;
630 xmm2 = xmm2 + a1 * b2;
631 xmm3 = xmm3 + a2 * b1;
632 xmm4 = xmm4 + a2 * b2;
634 (~C).
store( i , j , xmm1 );
635 (~C).
store( i , j+IT::size, xmm2 );
636 (~C).
store( i+1UL, j , xmm3 );
637 (~C).
store( i+1UL, j+IT::size, xmm4 );
641 for(
size_t k=0UL; k<K; ++k ) {
643 xmm1 = xmm1 + a1 * B.load(k,j );
644 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
646 (~C).
store( i, j , xmm1 );
647 (~C).
store( i, j+IT::size, xmm2 );
652 for( ; (i+2UL) <= M; i+=2UL ) {
654 for(
size_t k=0UL; k<K; ++k ) {
656 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
657 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
659 (~C).
store( i , j, xmm1 );
660 (~C).
store( i+1UL, j, xmm2 );
664 for(
size_t k=0UL; k<K; ++k ) {
665 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
667 (~C).
store( i, j, xmm1 );
688 template<
typename MT3
691 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
692 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
697 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
701 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
705 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
730 template<
typename MT3
733 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
734 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
736 selectDefaultAssignKernel( C, A, B );
756 template<
typename MT3
759 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
760 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
762 using boost::numeric_cast;
768 const int M ( numeric_cast<int>( A.rows() ) );
769 const int N ( numeric_cast<int>( B.columns() ) );
770 const int K ( numeric_cast<int>( A.columns() ) );
771 const int lda( numeric_cast<int>( A.spacing() ) );
772 const int ldb( numeric_cast<int>( B.spacing() ) );
773 const int ldc( numeric_cast<int>( C.spacing() ) );
775 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
776 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
777 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
778 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
799 template<
typename MT3
802 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
803 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
805 using boost::numeric_cast;
811 const int M ( numeric_cast<int>( A.rows() ) );
812 const int N ( numeric_cast<int>( B.columns() ) );
813 const int K ( numeric_cast<int>( A.columns() ) );
814 const int lda( numeric_cast<int>( A.spacing() ) );
815 const int ldb( numeric_cast<int>( B.spacing() ) );
816 const int ldc( numeric_cast<int>( C.spacing() ) );
818 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
819 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
820 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
821 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
842 template<
typename MT3
845 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
846 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
848 using boost::numeric_cast;
857 const int M ( numeric_cast<int>( A.rows() ) );
858 const int N ( numeric_cast<int>( B.columns() ) );
859 const int K ( numeric_cast<int>( A.columns() ) );
860 const int lda( numeric_cast<int>( A.spacing() ) );
861 const int ldb( numeric_cast<int>( B.spacing() ) );
862 const int ldc( numeric_cast<int>( C.spacing() ) );
863 const complex<float> alpha( 1.0F, 0.0F );
864 const complex<float> beta ( 0.0F, 0.0F );
866 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
867 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
868 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
869 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
890 template<
typename MT3
893 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
894 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
896 using boost::numeric_cast;
905 const int M ( numeric_cast<int>( A.rows() ) );
906 const int N ( numeric_cast<int>( B.columns() ) );
907 const int K ( numeric_cast<int>( A.columns() ) );
908 const int lda( numeric_cast<int>( A.spacing() ) );
909 const int ldb( numeric_cast<int>( B.spacing() ) );
910 const int ldc( numeric_cast<int>( C.spacing() ) );
911 const complex<double> alpha( 1.0, 0.0 );
912 const complex<double> beta ( 0.0, 0.0 );
914 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
915 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
916 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
917 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
935 template<
typename MT
941 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
953 const TmpType tmp(
serial( rhs ) );
972 template<
typename MT
981 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
995 DMatDMatMultExpr::selectAddAssignKernel( ~lhs, A, B );
1011 template<
typename MT3
1014 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1017 DMatDMatMultExpr::selectDefaultAddAssignKernel( C, A, B );
1019 DMatDMatMultExpr::selectBlasAddAssignKernel( C, A, B );
1038 template<
typename MT3
1041 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1042 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1044 const size_t M( A.rows() );
1045 const size_t N( B.columns() );
1046 const size_t K( A.columns() );
1049 const size_t end( N &
size_t(-2) );
1051 for(
size_t i=0UL; i<M; ++i ) {
1052 for(
size_t k=0UL; k<K; ++k ) {
1053 for(
size_t j=0UL; j<end; j+=2UL ) {
1054 C(i,j ) += A(i,k) * B(k,j );
1055 C(i,j+1UL) += A(i,k) * B(k,j+1UL);
1058 C(i,end) += A(i,k) * B(k,end);
1080 template<
typename MT3
1083 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1084 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1086 typedef IntrinsicTrait<ElementType> IT;
1088 const size_t M( A.rows() );
1089 const size_t N( B.columns() );
1090 const size_t K( A.columns() );
1094 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1095 for(
size_t i=0UL; i<M; ++i ) {
1104 for(
size_t k=0UL; k<K; ++k ) {
1106 xmm1 = xmm1 + a1 * B.load(k,j );
1107 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1108 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1109 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1110 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
1111 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
1112 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
1113 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
1115 (~C).
store( i, j , xmm1 );
1116 (~C).
store( i, j+IT::size , xmm2 );
1117 (~C).
store( i, j+IT::size*2UL, xmm3 );
1118 (~C).
store( i, j+IT::size*3UL, xmm4 );
1119 (~C).
store( i, j+IT::size*4UL, xmm5 );
1120 (~C).
store( i, j+IT::size*5UL, xmm6 );
1121 (~C).
store( i, j+IT::size*6UL, xmm7 );
1122 (~C).
store( i, j+IT::size*7UL, xmm8 );
1125 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1127 for( ; (i+2UL) <= M; i+=2UL ) {
1136 for(
size_t k=0UL; k<K; ++k ) {
1143 xmm1 = xmm1 + a1 * b1;
1144 xmm2 = xmm2 + a1 * b2;
1145 xmm3 = xmm3 + a1 * b3;
1146 xmm4 = xmm4 + a1 * b4;
1147 xmm5 = xmm5 + a2 * b1;
1148 xmm6 = xmm6 + a2 * b2;
1149 xmm7 = xmm7 + a2 * b3;
1150 xmm8 = xmm8 + a2 * b4;
1152 (~C).
store( i , j , xmm1 );
1153 (~C).
store( i , j+IT::size , xmm2 );
1154 (~C).
store( i , j+IT::size*2UL, xmm3 );
1155 (~C).
store( i , j+IT::size*3UL, xmm4 );
1156 (~C).
store( i+1UL, j , xmm5 );
1157 (~C).
store( i+1UL, j+IT::size , xmm6 );
1158 (~C).
store( i+1UL, j+IT::size*2UL, xmm7 );
1159 (~C).
store( i+1UL, j+IT::size*3UL, xmm8 );
1166 for(
size_t k=0UL; k<K; ++k ) {
1168 xmm1 = xmm1 + a1 * B.load(k,j );
1169 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
1170 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
1171 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
1173 (~C).
store( i, j , xmm1 );
1174 (~C).
store( i, j+IT::size , xmm2 );
1175 (~C).
store( i, j+IT::size*2UL, xmm3 );
1176 (~C).
store( i, j+IT::size*3UL, xmm4 );
1179 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1181 for( ; (i+2UL) <= M; i+=2UL ) {
1186 for(
size_t k=0UL; k<K; ++k ) {
1191 xmm1 = xmm1 + a1 * b1;
1192 xmm2 = xmm2 + a1 * b2;
1193 xmm3 = xmm3 + a2 * b1;
1194 xmm4 = xmm4 + a2 * b2;
1196 (~C).
store( i , j , xmm1 );
1197 (~C).
store( i , j+IT::size, xmm2 );
1198 (~C).
store( i+1UL, j , xmm3 );
1199 (~C).
store( i+1UL, j+IT::size, xmm4 );
1204 for(
size_t k=0UL; k<K; ++k ) {
1206 xmm1 = xmm1 + a1 * B.load(k,j );
1207 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
1209 (~C).
store( i, j , xmm1 );
1210 (~C).
store( i, j+IT::size, xmm2 );
1215 for( ; (i+2UL) <= M; i+=2UL ) {
1218 for(
size_t k=0UL; k<K; ++k ) {
1220 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
1221 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
1223 (~C).
store( i , j, xmm1 );
1224 (~C).
store( i+1UL, j, xmm2 );
1228 for(
size_t k=0UL; k<K; ++k ) {
1229 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
1231 (~C).
store( i, j, xmm1 );
1252 template<
typename MT3
1255 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1256 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1261 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1265 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1269 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1295 template<
typename MT3
1298 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1299 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1301 selectDefaultAddAssignKernel( C, A, B );
1321 template<
typename MT3
1324 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1325 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1327 using boost::numeric_cast;
1333 const int M ( numeric_cast<int>( A.rows() ) );
1334 const int N ( numeric_cast<int>( B.columns() ) );
1335 const int K ( numeric_cast<int>( A.columns() ) );
1336 const int lda( numeric_cast<int>( A.spacing() ) );
1337 const int ldb( numeric_cast<int>( B.spacing() ) );
1338 const int ldc( numeric_cast<int>( C.spacing() ) );
1340 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1341 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1342 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1343 M, N, K, 1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1364 template<
typename MT3
1367 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1368 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1370 using boost::numeric_cast;
1376 const int M ( numeric_cast<int>( A.rows() ) );
1377 const int N ( numeric_cast<int>( B.columns() ) );
1378 const int K ( numeric_cast<int>( A.columns() ) );
1379 const int lda( numeric_cast<int>( A.spacing() ) );
1380 const int ldb( numeric_cast<int>( B.spacing() ) );
1381 const int ldc( numeric_cast<int>( C.spacing() ) );
1383 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1384 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1385 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1386 M, N, K, 1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1407 template<
typename MT3
1410 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1411 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1413 using boost::numeric_cast;
1422 const int M ( numeric_cast<int>( A.rows() ) );
1423 const int N ( numeric_cast<int>( B.columns() ) );
1424 const int K ( numeric_cast<int>( A.columns() ) );
1425 const int lda( numeric_cast<int>( A.spacing() ) );
1426 const int ldb( numeric_cast<int>( B.spacing() ) );
1427 const int ldc( numeric_cast<int>( C.spacing() ) );
1428 const complex<float> alpha( 1.0F, 0.0F );
1429 const complex<float> beta ( 1.0F, 0.0F );
1431 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1432 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1433 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1434 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1455 template<
typename MT3
1458 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1459 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1461 using boost::numeric_cast;
1470 const int M ( numeric_cast<int>( A.rows() ) );
1471 const int N ( numeric_cast<int>( B.columns() ) );
1472 const int K ( numeric_cast<int>( A.columns() ) );
1473 const int lda( numeric_cast<int>( A.spacing() ) );
1474 const int ldb( numeric_cast<int>( B.spacing() ) );
1475 const int ldc( numeric_cast<int>( C.spacing() ) );
1476 const complex<double> alpha( 1.0, 0.0 );
1477 const complex<double> beta ( 1.0, 0.0 );
1479 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1480 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1481 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1482 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1505 template<
typename MT
1514 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
1528 DMatDMatMultExpr::selectSubAssignKernel( ~lhs, A, B );
1544 template<
typename MT3
1547 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1550 DMatDMatMultExpr::selectDefaultSubAssignKernel( C, A, B );
1552 DMatDMatMultExpr::selectBlasSubAssignKernel( C, A, B );
1571 template<
typename MT3
1574 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1575 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1577 const size_t M( A.rows() );
1578 const size_t N( B.columns() );
1579 const size_t K( A.columns() );
1582 const size_t end( N &
size_t(-2) );
1584 for(
size_t i=0UL; i<M; ++i ) {
1585 for(
size_t k=0UL; k<K; ++k ) {
1586 for(
size_t j=0UL; j<end; j+=2UL ) {
1587 C(i,j ) -= A(i,k) * B(k,j );
1588 C(i,j+1UL) -= A(i,k) * B(k,j+1UL);
1591 C(i,end) -= A(i,k) * B(k,end);
1613 template<
typename MT3
1616 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1617 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B )
1619 typedef IntrinsicTrait<ElementType> IT;
1621 const size_t M( A.rows() );
1622 const size_t N( B.columns() );
1623 const size_t K( A.columns() );
1627 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
1628 for(
size_t i=0UL; i<M; ++i ) {
1637 for(
size_t k=0UL; k<K; ++k ) {
1639 xmm1 = xmm1 - a1 * B.load(k,j );
1640 xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1641 xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1642 xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1643 xmm5 = xmm5 - a1 * B.load(k,j+IT::size*4UL);
1644 xmm6 = xmm6 - a1 * B.load(k,j+IT::size*5UL);
1645 xmm7 = xmm7 - a1 * B.load(k,j+IT::size*6UL);
1646 xmm8 = xmm8 - a1 * B.load(k,j+IT::size*7UL);
1648 (~C).
store( i, j , xmm1 );
1649 (~C).
store( i, j+IT::size , xmm2 );
1650 (~C).
store( i, j+IT::size*2UL, xmm3 );
1651 (~C).
store( i, j+IT::size*3UL, xmm4 );
1652 (~C).
store( i, j+IT::size*4UL, xmm5 );
1653 (~C).
store( i, j+IT::size*5UL, xmm6 );
1654 (~C).
store( i, j+IT::size*6UL, xmm7 );
1655 (~C).
store( i, j+IT::size*7UL, xmm8 );
1658 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
1660 for( ; (i+2UL) <= M; i+=2UL ) {
1669 for(
size_t k=0UL; k<K; ++k ) {
1676 xmm1 = xmm1 - a1 * b1;
1677 xmm2 = xmm2 - a1 * b2;
1678 xmm3 = xmm3 - a1 * b3;
1679 xmm4 = xmm4 - a1 * b4;
1680 xmm5 = xmm5 - a2 * b1;
1681 xmm6 = xmm6 - a2 * b2;
1682 xmm7 = xmm7 - a2 * b3;
1683 xmm8 = xmm8 - a2 * b4;
1685 (~C).
store( i , j , xmm1 );
1686 (~C).
store( i , j+IT::size , xmm2 );
1687 (~C).
store( i , j+IT::size*2UL, xmm3 );
1688 (~C).
store( i , j+IT::size*3UL, xmm4 );
1689 (~C).
store( i+1UL, j , xmm5 );
1690 (~C).
store( i+1UL, j+IT::size , xmm6 );
1691 (~C).
store( i+1UL, j+IT::size*2UL, xmm7 );
1692 (~C).
store( i+1UL, j+IT::size*3UL, xmm8 );
1699 for(
size_t k=0UL; k<K; ++k ) {
1701 xmm1 = xmm1 - a1 * B.load(k,j );
1702 xmm2 = xmm2 - a1 * B.load(k,j+IT::size );
1703 xmm3 = xmm3 - a1 * B.load(k,j+IT::size*2UL);
1704 xmm4 = xmm4 - a1 * B.load(k,j+IT::size*3UL);
1706 (~C).
store( i, j , xmm1 );
1707 (~C).
store( i, j+IT::size , xmm2 );
1708 (~C).
store( i, j+IT::size*2UL, xmm3 );
1709 (~C).
store( i, j+IT::size*3UL, xmm4 );
1712 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
1714 for( ; (i+2UL) <= M; i+=2UL ) {
1719 for(
size_t k=0UL; k<K; ++k ) {
1724 xmm1 = xmm1 - a1 * b1;
1725 xmm2 = xmm2 - a1 * b2;
1726 xmm3 = xmm3 - a2 * b1;
1727 xmm4 = xmm4 - a2 * b2;
1729 (~C).
store( i , j , xmm1 );
1730 (~C).
store( i , j+IT::size, xmm2 );
1731 (~C).
store( i+1UL, j , xmm3 );
1732 (~C).
store( i+1UL, j+IT::size, xmm4 );
1737 for(
size_t k=0UL; k<K; ++k ) {
1739 xmm1 = xmm1 - a1 * B.load(k,j );
1740 xmm2 = xmm2 - a1 * B.load(k,j+IT::size);
1742 (~C).
store( i, j , xmm1 );
1743 (~C).
store( i, j+IT::size, xmm2 );
1748 for( ; (i+2UL) <= M; i+=2UL ) {
1751 for(
size_t k=0UL; k<K; ++k ) {
1753 xmm1 = xmm1 -
set( A(i ,k) ) * b1;
1754 xmm2 = xmm2 -
set( A(i+1UL,k) ) * b1;
1756 (~C).
store( i , j, xmm1 );
1757 (~C).
store( i+1UL, j, xmm2 );
1761 for(
size_t k=0UL; k<K; ++k ) {
1762 xmm1 = xmm1 -
set( A(i,k) ) * B.load(k,j);
1764 (~C).
store( i, j, xmm1 );
1785 template<
typename MT3
1788 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5> >::Type
1789 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B )
1794 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
1798 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
1802 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
1828 template<
typename MT3
1831 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5> >::Type
1832 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1834 selectDefaultSubAssignKernel( C, A, B );
1854 template<
typename MT3
1857 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5> >::Type
1858 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1860 using boost::numeric_cast;
1866 const int M ( numeric_cast<int>( A.rows() ) );
1867 const int N ( numeric_cast<int>( B.columns() ) );
1868 const int K ( numeric_cast<int>( A.columns() ) );
1869 const int lda( numeric_cast<int>( A.spacing() ) );
1870 const int ldb( numeric_cast<int>( B.spacing() ) );
1871 const int ldc( numeric_cast<int>( C.spacing() ) );
1873 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1874 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1875 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1876 M, N, K, -1.0F, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
1897 template<
typename MT3
1900 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5> >::Type
1901 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1903 using boost::numeric_cast;
1909 const int M ( numeric_cast<int>( A.rows() ) );
1910 const int N ( numeric_cast<int>( B.columns() ) );
1911 const int K ( numeric_cast<int>( A.columns() ) );
1912 const int lda( numeric_cast<int>( A.spacing() ) );
1913 const int ldb( numeric_cast<int>( B.spacing() ) );
1914 const int ldc( numeric_cast<int>( C.spacing() ) );
1916 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1917 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1918 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1919 M, N, K, -1.0, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
1940 template<
typename MT3
1943 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1944 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1946 using boost::numeric_cast;
1955 const int M ( numeric_cast<int>( A.rows() ) );
1956 const int N ( numeric_cast<int>( B.columns() ) );
1957 const int K ( numeric_cast<int>( A.columns() ) );
1958 const int lda( numeric_cast<int>( A.spacing() ) );
1959 const int ldb( numeric_cast<int>( B.spacing() ) );
1960 const int ldc( numeric_cast<int>( C.spacing() ) );
1961 const complex<float> alpha( -1.0F, 0.0F );
1962 const complex<float> beta ( 1.0F, 0.0F );
1964 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
1965 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1966 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
1967 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
1988 template<
typename MT3
1991 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
1992 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B )
1994 using boost::numeric_cast;
2003 const int M ( numeric_cast<int>( A.rows() ) );
2004 const int N ( numeric_cast<int>( B.columns() ) );
2005 const int K ( numeric_cast<int>( A.columns() ) );
2006 const int lda( numeric_cast<int>( A.spacing() ) );
2007 const int ldb( numeric_cast<int>( B.spacing() ) );
2008 const int ldc( numeric_cast<int>( C.spacing() ) );
2009 const complex<double> alpha( -1.0, 0.0 );
2010 const complex<double> beta ( 1.0, 0.0 );
2012 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2013 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2014 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2015 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
2048 template<
typename MT
2050 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2058 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2061 else if( rhs.lhs_.columns() == 0UL ) {
2095 template<
typename MT
2097 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2102 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
2114 const TmpType tmp( rhs );
2135 template<
typename MT
2137 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2145 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2183 template<
typename MT
2185 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
2193 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || rhs.lhs_.columns() == 0UL ) {
2252 template<
typename MT1
2256 :
public DenseMatrix< DMatScalarMultExpr< DMatDMatMultExpr<MT1,MT2>, ST, false >, false >
2257 ,
private MatScalarMultExpr
2258 ,
private Computation
2262 typedef DMatDMatMultExpr<MT1,MT2> MMM;
2274 enum { evaluateLeft = IsComputation<MT1>::value || RequiresEvaluation<MT1>::value };
2279 enum { evaluateRight = IsComputation<MT2>::value || RequiresEvaluation<MT2>::value };
2287 template<
typename MT >
2288 struct UseSMPAssign {
2289 enum { value = ( evaluateLeft || evaluateRight ) };
2298 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2299 struct UseSinglePrecisionKernel {
2300 enum { value = IsFloat<typename T1::ElementType>::value &&
2301 IsFloat<typename T2::ElementType>::value &&
2302 IsFloat<typename T3::ElementType>::value &&
2303 !IsComplex<T4>::value };
2312 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2313 struct UseDoublePrecisionKernel {
2314 enum { value = IsDouble<typename T1::ElementType>::value &&
2315 IsDouble<typename T2::ElementType>::value &&
2316 IsDouble<typename T3::ElementType>::value &&
2317 !IsComplex<T4>::value };
2326 template<
typename T1,
typename T2,
typename T3 >
2327 struct UseSinglePrecisionComplexKernel {
2328 typedef complex<float> Type;
2329 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2330 IsSame<typename T2::ElementType,Type>::value &&
2331 IsSame<typename T3::ElementType,Type>::value };
2340 template<
typename T1,
typename T2,
typename T3 >
2341 struct UseDoublePrecisionComplexKernel {
2342 typedef complex<double> Type;
2343 enum { value = IsSame<typename T1::ElementType,Type>::value &&
2344 IsSame<typename T2::ElementType,Type>::value &&
2345 IsSame<typename T3::ElementType,Type>::value };
2353 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2354 struct UseDefaultKernel {
2355 enum { value = !
BLAZE_BLAS_MODE || ( !UseSinglePrecisionKernel<T1,T2,T3,T4>::value &&
2356 !UseDoublePrecisionKernel<T1,T2,T3,T4>::value &&
2357 !UseSinglePrecisionComplexKernel<T1,T2,T3>::value &&
2358 !UseDoublePrecisionComplexKernel<T1,T2,T3>::value ) };
2366 template<
typename T1,
typename T2,
typename T3,
typename T4 >
2367 struct UseVectorizedDefaultKernel {
2368 enum { value = T1::vectorizable && T2::vectorizable && T3::vectorizable &&
2369 IsSame<typename T1::ElementType,typename T2::ElementType>::value &&
2370 IsSame<typename T1::ElementType,typename T3::ElementType>::value &&
2371 IsSame<typename T1::ElementType,T4>::value &&
2372 IntrinsicTrait<typename T1::ElementType>::addition &&
2373 IntrinsicTrait<typename T1::ElementType>::subtraction &&
2374 IntrinsicTrait<typename T1::ElementType>::multiplication };
2380 typedef DMatScalarMultExpr<MMM,ST,false>
This;
2381 typedef typename MultTrait<RES,ST>::Type
ResultType;
2385 typedef typename IntrinsicTrait<ElementType>::Type
IntrinsicType;
2390 typedef const DMatDMatMultExpr<MT1,MT2>
LeftOperand;
2396 typedef typename SelectType< evaluateLeft, const RT1, CT1 >::Type
LT;
2399 typedef typename SelectType< evaluateRight, const RT2, CT2 >::Type
RT;
2404 enum { vectorizable = MT1::vectorizable && MT2::vectorizable &&
2405 IsSame<ET1,ET2>::value &&
2406 IsSame<ET1,ST>::value &&
2407 IntrinsicTrait<ET1>::addition &&
2408 IntrinsicTrait<ET1>::multiplication };
2411 enum { smpAssignable = !evaluateLeft && MT1::smpAssignable &&
2412 !evaluateRight && MT2::smpAssignable };
2421 explicit inline DMatScalarMultExpr(
const MMM& matrix, ST scalar )
2437 return matrix_(i,j) * scalar_;
2446 inline size_t rows()
const {
2447 return matrix_.rows();
2456 inline size_t columns()
const {
2457 return matrix_.columns();
2487 template<
typename T >
2488 inline bool canAlias(
const T* alias )
const {
2489 return matrix_.canAlias( alias );
2499 template<
typename T >
2500 inline bool isAliased(
const T* alias )
const {
2501 return matrix_.isAliased( alias );
2511 return matrix_.isAligned();
2521 typename MMM::LeftOperand A( matrix_.leftOperand() );
2546 template<
typename MT
2548 friend inline void assign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
2555 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
2556 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
2558 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
2561 else if( left.columns() == 0UL ) {
2576 DMatScalarMultExpr::selectAssignKernel( ~lhs, A, B, rhs.scalar_ );
2591 template<
typename MT3
2595 static inline void selectAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2598 DMatScalarMultExpr::selectDefaultAssignKernel( C, A, B, scalar );
2600 DMatScalarMultExpr::selectBlasAssignKernel( C, A, B, scalar );
2618 template<
typename MT3
2622 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2623 selectDefaultAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2625 const size_t M( A.rows() );
2626 const size_t N( B.columns() );
2627 const size_t K( A.columns() );
2629 for(
size_t i=0UL; i<M; ++i ) {
2630 for(
size_t j=0UL; j<N; ++j ) {
2631 C(i,j) = A(i,0UL) * B(0UL,j);
2633 for(
size_t k=1UL; k<K; ++k ) {
2634 for(
size_t j=0UL; j<N; ++j ) {
2635 C(i,j) += A(i,k) * B(k,j);
2638 for(
size_t j=0UL; j<N; ++j ) {
2659 template<
typename MT3
2663 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2664 selectDefaultAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2666 typedef IntrinsicTrait<ElementType> IT;
2668 const size_t M( A.rows() );
2669 const size_t N( B.columns() );
2670 const size_t K( A.columns() );
2676 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
2677 for(
size_t i=0UL; i<M; ++i ) {
2678 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2679 for(
size_t k=0UL; k<K; ++k ) {
2681 xmm1 = xmm1 + a1 * B.load(k,j );
2682 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2683 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2684 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2685 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
2686 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
2687 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
2688 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
2690 (~C).
store( i, j , xmm1 * factor );
2691 (~C).
store( i, j+IT::size , xmm2 * factor );
2692 (~C).
store( i, j+IT::size*2UL, xmm3 * factor );
2693 (~C).
store( i, j+IT::size*3UL, xmm4 * factor );
2694 (~C).
store( i, j+IT::size*4UL, xmm5 * factor );
2695 (~C).
store( i, j+IT::size*5UL, xmm6 * factor );
2696 (~C).
store( i, j+IT::size*6UL, xmm7 * factor );
2697 (~C).
store( i, j+IT::size*7UL, xmm8 * factor );
2700 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
2702 for( ; (i+2UL) <= M; i+=2UL ) {
2703 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
2704 for(
size_t k=0UL; k<K; ++k ) {
2711 xmm1 = xmm1 + a1 * b1;
2712 xmm2 = xmm2 + a1 * b2;
2713 xmm3 = xmm3 + a1 * b3;
2714 xmm4 = xmm4 + a1 * b4;
2715 xmm5 = xmm5 + a2 * b1;
2716 xmm6 = xmm6 + a2 * b2;
2717 xmm7 = xmm7 + a2 * b3;
2718 xmm8 = xmm8 + a2 * b4;
2720 (~C).
store( i , j , xmm1 * factor );
2721 (~C).
store( i , j+IT::size , xmm2 * factor );
2722 (~C).
store( i , j+IT::size*2UL, xmm3 * factor );
2723 (~C).
store( i , j+IT::size*3UL, xmm4 * factor );
2724 (~C).
store( i+1UL, j , xmm5 * factor );
2725 (~C).
store( i+1UL, j+IT::size , xmm6 * factor );
2726 (~C).
store( i+1UL, j+IT::size*2UL, xmm7 * factor );
2727 (~C).
store( i+1UL, j+IT::size*3UL, xmm8 * factor );
2731 for(
size_t k=0UL; k<K; ++k ) {
2733 xmm1 = xmm1 + a1 * B.load(k,j );
2734 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
2735 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
2736 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
2738 (~C).
store( i, j , xmm1 * factor );
2739 (~C).
store( i, j+IT::size , xmm2 * factor );
2740 (~C).
store( i, j+IT::size*2UL, xmm3 * factor );
2741 (~C).
store( i, j+IT::size*3UL, xmm4 * factor );
2744 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
2746 for( ; (i+2UL) <= M; i+=2UL ) {
2748 for(
size_t k=0UL; k<K; ++k ) {
2753 xmm1 = xmm1 + a1 * b1;
2754 xmm2 = xmm2 + a1 * b2;
2755 xmm3 = xmm3 + a2 * b1;
2756 xmm4 = xmm4 + a2 * b2;
2758 (~C).
store( i , j , xmm1 * factor );
2759 (~C).
store( i , j+IT::size, xmm2 * factor );
2760 (~C).
store( i+1UL, j , xmm3 * factor );
2761 (~C).
store( i+1UL, j+IT::size, xmm4 * factor );
2765 for(
size_t k=0UL; k<K; ++k ) {
2767 xmm1 = xmm1 + a1 * B.load(k,j );
2768 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
2770 (~C).
store( i, j , xmm1 * factor );
2771 (~C).
store( i, j+IT::size, xmm2 * factor );
2776 for( ; (i+2UL) <= M; i+=2UL ) {
2778 for(
size_t k=0UL; k<K; ++k ) {
2780 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
2781 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
2783 (~C).
store( i , j, xmm1 * factor );
2784 (~C).
store( i+1UL, j, xmm2 * factor );
2788 for(
size_t k=0UL; k<K; ++k ) {
2789 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
2791 (~C).
store( i, j, xmm1 * factor );
2811 template<
typename MT3
2815 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2816 selectDefaultAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
2821 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
2823 assign( ~C, tmp * B * scalar );
2825 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
2827 assign( ~C, A * tmp * scalar );
2829 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
2831 assign( ~C, tmp * B * scalar );
2835 assign( ~C, A * tmp * scalar );
2854 template<
typename MT3
2858 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
2859 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2861 selectDefaultAssignKernel( C, A, B, scalar );
2880 template<
typename MT3
2884 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2885 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2887 using boost::numeric_cast;
2893 const int M ( numeric_cast<int>( A.rows() ) );
2894 const int N ( numeric_cast<int>( B.columns() ) );
2895 const int K ( numeric_cast<int>( A.columns() ) );
2896 const int lda( numeric_cast<int>( A.spacing() ) );
2897 const int ldb( numeric_cast<int>( B.spacing() ) );
2898 const int ldc( numeric_cast<int>( C.spacing() ) );
2900 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2901 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2902 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2903 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0F, C.data(), ldc );
2923 template<
typename MT3
2927 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
2928 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2930 using boost::numeric_cast;
2936 const int M ( numeric_cast<int>( A.rows() ) );
2937 const int N ( numeric_cast<int>( B.columns() ) );
2938 const int K ( numeric_cast<int>( A.columns() ) );
2939 const int lda( numeric_cast<int>( A.spacing() ) );
2940 const int ldb( numeric_cast<int>( B.spacing() ) );
2941 const int ldc( numeric_cast<int>( C.spacing() ) );
2943 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2944 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2945 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2946 M, N, K, scalar, A.data(), lda, B.data(), ldb, 0.0, C.data(), ldc );
2966 template<
typename MT3
2970 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
2971 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
2973 using boost::numeric_cast;
2982 const int M ( numeric_cast<int>( A.rows() ) );
2983 const int N ( numeric_cast<int>( B.columns() ) );
2984 const int K ( numeric_cast<int>( A.columns() ) );
2985 const int lda( numeric_cast<int>( A.spacing() ) );
2986 const int ldb( numeric_cast<int>( B.spacing() ) );
2987 const int ldc( numeric_cast<int>( C.spacing() ) );
2988 const complex<float> alpha( scalar );
2989 const complex<float> beta ( 0.0F, 0.0F );
2991 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
2992 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2993 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
2994 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3014 template<
typename MT3
3018 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3019 selectBlasAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3021 using boost::numeric_cast;
3030 const int M ( numeric_cast<int>( A.rows() ) );
3031 const int N ( numeric_cast<int>( B.columns() ) );
3032 const int K ( numeric_cast<int>( A.columns() ) );
3033 const int lda( numeric_cast<int>( A.spacing() ) );
3034 const int ldb( numeric_cast<int>( B.spacing() ) );
3035 const int ldc( numeric_cast<int>( C.spacing() ) );
3036 const complex<double> alpha( scalar );
3037 const complex<double> beta ( 0.0, 0.0 );
3039 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3040 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3041 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3042 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3058 template<
typename MT
3060 friend inline void assign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3064 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
3076 const TmpType tmp(
serial( rhs ) );
3093 template<
typename MT
3095 friend inline void addAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3102 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3103 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3105 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3119 DMatScalarMultExpr::selectAddAssignKernel( ~lhs, A, B, rhs.scalar_ );
3134 template<
typename MT3
3138 static inline void selectAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3141 DMatScalarMultExpr::selectDefaultAddAssignKernel( C, A, B, scalar );
3143 DMatScalarMultExpr::selectBlasAddAssignKernel( C, A, B, scalar );
3161 template<
typename MT3
3165 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3166 selectDefaultAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3187 template<
typename MT3
3191 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3192 selectDefaultAddAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3194 typedef IntrinsicTrait<ElementType> IT;
3196 const size_t M( A.rows() );
3197 const size_t N( B.columns() );
3198 const size_t K( A.columns() );
3204 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3205 for(
size_t i=0UL; i<M; ++i ) {
3206 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3207 for(
size_t k=0UL; k<K; ++k ) {
3209 xmm1 = xmm1 + a1 * B.load(k,j );
3210 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3211 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3212 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3213 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3214 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3215 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3216 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3218 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3219 (~C).
store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3220 (~C).
store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3221 (~C).
store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3222 (~C).
store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) + xmm5 * factor );
3223 (~C).
store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) + xmm6 * factor );
3224 (~C).
store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) + xmm7 * factor );
3225 (~C).
store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) + xmm8 * factor );
3228 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3230 for( ; (i+2UL) <= M; i+=2UL ) {
3231 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3232 for(
size_t k=0UL; k<K; ++k ) {
3239 xmm1 = xmm1 + a1 * b1;
3240 xmm2 = xmm2 + a1 * b2;
3241 xmm3 = xmm3 + a1 * b3;
3242 xmm4 = xmm4 + a1 * b4;
3243 xmm5 = xmm5 + a2 * b1;
3244 xmm6 = xmm6 + a2 * b2;
3245 xmm7 = xmm7 + a2 * b3;
3246 xmm8 = xmm8 + a2 * b4;
3248 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3249 (~C).
store( i , j+IT::size , (~C).load(i ,j+IT::size ) + xmm2 * factor );
3250 (~C).
store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) + xmm3 * factor );
3251 (~C).
store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) + xmm4 * factor );
3252 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm5 * factor );
3253 (~C).
store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) + xmm6 * factor );
3254 (~C).
store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) + xmm7 * factor );
3255 (~C).
store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) + xmm8 * factor );
3259 for(
size_t k=0UL; k<K; ++k ) {
3261 xmm1 = xmm1 + a1 * B.load(k,j );
3262 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3263 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3264 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3266 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3267 (~C).
store( i, j+IT::size , (~C).load(i,j+IT::size ) + xmm2 * factor );
3268 (~C).
store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) + xmm3 * factor );
3269 (~C).
store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) + xmm4 * factor );
3272 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3274 for( ; (i+2UL) <= M; i+=2UL ) {
3276 for(
size_t k=0UL; k<K; ++k ) {
3281 xmm1 = xmm1 + a1 * b1;
3282 xmm2 = xmm2 + a1 * b2;
3283 xmm3 = xmm3 + a2 * b1;
3284 xmm4 = xmm4 + a2 * b2;
3286 (~C).
store( i , j , (~C).load(i ,j ) + xmm1 * factor );
3287 (~C).
store( i , j+IT::size, (~C).load(i ,j+IT::size) + xmm2 * factor );
3288 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) + xmm3 * factor );
3289 (~C).
store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) + xmm4 * factor );
3293 for(
size_t k=0UL; k<K; ++k ) {
3295 xmm1 = xmm1 + a1 * B.load(k,j );
3296 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3298 (~C).
store( i, j , (~C).load(i,j ) + xmm1 * factor );
3299 (~C).
store( i, j+IT::size, (~C).load(i,j+IT::size) + xmm2 * factor );
3304 for( ; (i+2UL) <= M; i+=2UL ) {
3306 for(
size_t k=0UL; k<K; ++k ) {
3308 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
3309 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
3311 (~C).
store( i , j, (~C).load(i ,j) + xmm1 * factor );
3312 (~C).
store( i+1UL, j, (~C).load(i+1UL,j) + xmm2 * factor );
3316 for(
size_t k=0UL; k<K; ++k ) {
3317 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
3319 (~C).
store( i, j, (~C).load(i,j) + xmm1 * factor );
3339 template<
typename MT3
3343 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3344 selectDefaultAddAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3349 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3353 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3357 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3382 template<
typename MT3
3386 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3387 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3389 selectDefaultAddAssignKernel( C, A, B, scalar );
3408 template<
typename MT3
3412 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3413 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3415 using boost::numeric_cast;
3421 const int M ( numeric_cast<int>( A.rows() ) );
3422 const int N ( numeric_cast<int>( B.columns() ) );
3423 const int K ( numeric_cast<int>( A.columns() ) );
3424 const int lda( numeric_cast<int>( A.spacing() ) );
3425 const int ldb( numeric_cast<int>( B.spacing() ) );
3426 const int ldc( numeric_cast<int>( C.spacing() ) );
3428 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3429 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3430 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3431 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3451 template<
typename MT3
3455 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3456 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3458 using boost::numeric_cast;
3464 const int M ( numeric_cast<int>( A.rows() ) );
3465 const int N ( numeric_cast<int>( B.columns() ) );
3466 const int K ( numeric_cast<int>( A.columns() ) );
3467 const int lda( numeric_cast<int>( A.spacing() ) );
3468 const int ldb( numeric_cast<int>( B.spacing() ) );
3469 const int ldc( numeric_cast<int>( C.spacing() ) );
3471 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3472 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3473 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3474 M, N, K, scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3494 template<
typename MT3
3498 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3499 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3501 using boost::numeric_cast;
3510 const int M ( numeric_cast<int>( A.rows() ) );
3511 const int N ( numeric_cast<int>( B.columns() ) );
3512 const int K ( numeric_cast<int>( A.columns() ) );
3513 const int lda( numeric_cast<int>( A.spacing() ) );
3514 const int ldb( numeric_cast<int>( B.spacing() ) );
3515 const int ldc( numeric_cast<int>( C.spacing() ) );
3516 const complex<float> alpha( scalar );
3517 const complex<float> beta ( 1.0F, 0.0F );
3519 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3520 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3521 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3522 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3542 template<
typename MT3
3546 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3547 selectBlasAddAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3549 using boost::numeric_cast;
3558 const int M ( numeric_cast<int>( A.rows() ) );
3559 const int N ( numeric_cast<int>( B.columns() ) );
3560 const int K ( numeric_cast<int>( A.columns() ) );
3561 const int lda( numeric_cast<int>( A.spacing() ) );
3562 const int ldb( numeric_cast<int>( B.spacing() ) );
3563 const int ldc( numeric_cast<int>( C.spacing() ) );
3564 const complex<double> alpha( scalar );
3565 const complex<double> beta ( 1.0, 0.0 );
3567 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3568 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3569 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3570 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
3591 template<
typename MT
3593 friend inline void subAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
3600 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
3601 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
3603 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
3617 DMatScalarMultExpr::selectSubAssignKernel( ~lhs, A, B, rhs.scalar_ );
3632 template<
typename MT3
3636 static inline void selectSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3639 DMatScalarMultExpr::selectDefaultSubAssignKernel( C, A, B, scalar );
3641 DMatScalarMultExpr::selectBlasSubAssignKernel( C, A, B, scalar );
3659 template<
typename MT3
3663 static inline typename DisableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3664 selectDefaultSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3685 template<
typename MT3
3689 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3690 selectDefaultSubAssignKernel( DenseMatrix<MT3,false>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3692 typedef IntrinsicTrait<ElementType> IT;
3694 const size_t M( A.rows() );
3695 const size_t N( B.columns() );
3696 const size_t K( A.columns() );
3702 for( ; (j+IT::size*7UL) < N; j+=IT::size*8UL ) {
3703 for(
size_t i=0UL; i<M; ++i ) {
3704 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3705 for(
size_t k=0UL; k<K; ++k ) {
3707 xmm1 = xmm1 + a1 * B.load(k,j );
3708 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3709 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3710 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3711 xmm5 = xmm5 + a1 * B.load(k,j+IT::size*4UL);
3712 xmm6 = xmm6 + a1 * B.load(k,j+IT::size*5UL);
3713 xmm7 = xmm7 + a1 * B.load(k,j+IT::size*6UL);
3714 xmm8 = xmm8 + a1 * B.load(k,j+IT::size*7UL);
3716 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
3717 (~C).
store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3718 (~C).
store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3719 (~C).
store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3720 (~C).
store( i, j+IT::size*4UL, (~C).load(i,j+IT::size*4UL) - xmm5 * factor );
3721 (~C).
store( i, j+IT::size*5UL, (~C).load(i,j+IT::size*5UL) - xmm6 * factor );
3722 (~C).
store( i, j+IT::size*6UL, (~C).load(i,j+IT::size*6UL) - xmm7 * factor );
3723 (~C).
store( i, j+IT::size*7UL, (~C).load(i,j+IT::size*7UL) - xmm8 * factor );
3726 for( ; (j+IT::size*3UL) < N; j+=IT::size*4UL ) {
3728 for( ; (i+2UL) <= M; i+=2UL ) {
3729 IntrinsicType xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
3730 for(
size_t k=0UL; k<K; ++k ) {
3737 xmm1 = xmm1 + a1 * b1;
3738 xmm2 = xmm2 + a1 * b2;
3739 xmm3 = xmm3 + a1 * b3;
3740 xmm4 = xmm4 + a1 * b4;
3741 xmm5 = xmm5 + a2 * b1;
3742 xmm6 = xmm6 + a2 * b2;
3743 xmm7 = xmm7 + a2 * b3;
3744 xmm8 = xmm8 + a2 * b4;
3746 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3747 (~C).
store( i , j+IT::size , (~C).load(i ,j+IT::size ) - xmm2 * factor );
3748 (~C).
store( i , j+IT::size*2UL, (~C).load(i ,j+IT::size*2UL) - xmm3 * factor );
3749 (~C).
store( i , j+IT::size*3UL, (~C).load(i ,j+IT::size*3UL) - xmm4 * factor );
3750 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm5 * factor );
3751 (~C).
store( i+1UL, j+IT::size , (~C).load(i+1UL,j+IT::size ) - xmm6 * factor );
3752 (~C).
store( i+1UL, j+IT::size*2UL, (~C).load(i+1UL,j+IT::size*2UL) - xmm7 * factor );
3753 (~C).
store( i+1UL, j+IT::size*3UL, (~C).load(i+1UL,j+IT::size*3UL) - xmm8 * factor );
3757 for(
size_t k=0UL; k<K; ++k ) {
3759 xmm1 = xmm1 + a1 * B.load(k,j );
3760 xmm2 = xmm2 + a1 * B.load(k,j+IT::size );
3761 xmm3 = xmm3 + a1 * B.load(k,j+IT::size*2UL);
3762 xmm4 = xmm4 + a1 * B.load(k,j+IT::size*3UL);
3764 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
3765 (~C).
store( i, j+IT::size , (~C).load(i,j+IT::size ) - xmm2 * factor );
3766 (~C).
store( i, j+IT::size*2UL, (~C).load(i,j+IT::size*2UL) - xmm3 * factor );
3767 (~C).
store( i, j+IT::size*3UL, (~C).load(i,j+IT::size*3UL) - xmm4 * factor );
3770 for( ; (j+IT::size) < N; j+=IT::size*2UL ) {
3772 for( ; (i+2UL) <= M; i+=2UL ) {
3774 for(
size_t k=0UL; k<K; ++k ) {
3779 xmm1 = xmm1 + a1 * b1;
3780 xmm2 = xmm2 + a1 * b2;
3781 xmm3 = xmm3 + a2 * b1;
3782 xmm4 = xmm4 + a2 * b2;
3784 (~C).
store( i , j , (~C).load(i ,j ) - xmm1 * factor );
3785 (~C).
store( i , j+IT::size, (~C).load(i ,j+IT::size) - xmm2 * factor );
3786 (~C).
store( i+1UL, j , (~C).load(i+1UL,j ) - xmm3 * factor );
3787 (~C).
store( i+1UL, j+IT::size, (~C).load(i+1UL,j+IT::size) - xmm4 * factor );
3791 for(
size_t k=0UL; k<K; ++k ) {
3793 xmm1 = xmm1 + a1 * B.load(k,j );
3794 xmm2 = xmm2 + a1 * B.load(k,j+IT::size);
3796 (~C).
store( i, j , (~C).load(i,j ) - xmm1 * factor );
3797 (~C).
store( i, j+IT::size, (~C).load(i,j+IT::size) - xmm2 * factor );
3802 for( ; (i+2UL) <= M; i+=2UL ) {
3804 for(
size_t k=0UL; k<K; ++k ) {
3806 xmm1 = xmm1 +
set( A(i ,k) ) * b1;
3807 xmm2 = xmm2 +
set( A(i+1UL,k) ) * b1;
3809 (~C).
store( i , j, (~C).load(i ,j) - xmm1 * factor );
3810 (~C).
store( i+1UL, j, (~C).load(i+1UL,j) - xmm2 * factor );
3814 for(
size_t k=0UL; k<K; ++k ) {
3815 xmm1 = xmm1 +
set( A(i,k) ) * B.load(k,j);
3817 (~C).
store( i, j, (~C).load(i,j) - xmm1 * factor );
3837 template<
typename MT3
3841 static inline typename EnableIf< UseVectorizedDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3842 selectDefaultSubAssignKernel( DenseMatrix<MT3,true>& C,
const MT4& A,
const MT5& B, ST2 scalar )
3847 if( !IsResizable<MT4>::value && IsResizable<MT5>::value ) {
3851 else if( IsResizable<MT4>::value && !IsResizable<MT5>::value ) {
3855 else if( A.rows() * A.columns() <= B.rows() * B.columns() ) {
3880 template<
typename MT3
3884 static inline typename EnableIf< UseDefaultKernel<MT3,MT4,MT5,ST2> >::Type
3885 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3887 selectDefaultSubAssignKernel( C, A, B, scalar );
3906 template<
typename MT3
3910 static inline typename EnableIf< UseSinglePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3911 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3913 using boost::numeric_cast;
3919 const int M ( numeric_cast<int>( A.rows() ) );
3920 const int N ( numeric_cast<int>( B.columns() ) );
3921 const int K ( numeric_cast<int>( A.columns() ) );
3922 const int lda( numeric_cast<int>( A.spacing() ) );
3923 const int ldb( numeric_cast<int>( B.spacing() ) );
3924 const int ldc( numeric_cast<int>( C.spacing() ) );
3926 cblas_sgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3927 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3928 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3929 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0F, C.data(), ldc );
3949 template<
typename MT3
3953 static inline typename EnableIf< UseDoublePrecisionKernel<MT3,MT4,MT5,ST2> >::Type
3954 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3956 using boost::numeric_cast;
3962 const int M ( numeric_cast<int>( A.rows() ) );
3963 const int N ( numeric_cast<int>( B.columns() ) );
3964 const int K ( numeric_cast<int>( A.columns() ) );
3965 const int lda( numeric_cast<int>( A.spacing() ) );
3966 const int ldb( numeric_cast<int>( B.spacing() ) );
3967 const int ldc( numeric_cast<int>( C.spacing() ) );
3969 cblas_dgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
3970 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3971 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
3972 M, N, K, -scalar, A.data(), lda, B.data(), ldb, 1.0, C.data(), ldc );
3992 template<
typename MT3
3996 static inline typename EnableIf< UseSinglePrecisionComplexKernel<MT3,MT4,MT5> >::Type
3997 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
3999 using boost::numeric_cast;
4008 const int M ( numeric_cast<int>( A.rows() ) );
4009 const int N ( numeric_cast<int>( B.columns() ) );
4010 const int K ( numeric_cast<int>( A.columns() ) );
4011 const int lda( numeric_cast<int>( A.spacing() ) );
4012 const int ldb( numeric_cast<int>( B.spacing() ) );
4013 const int ldc( numeric_cast<int>( C.spacing() ) );
4014 const complex<float> alpha( -scalar );
4015 const complex<float> beta ( 1.0F, 0.0F );
4017 cblas_cgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4018 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4019 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4020 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4040 template<
typename MT3
4044 static inline typename EnableIf< UseDoublePrecisionComplexKernel<MT3,MT4,MT5> >::Type
4045 selectBlasSubAssignKernel( MT3& C,
const MT4& A,
const MT5& B, ST2 scalar )
4047 using boost::numeric_cast;
4056 const int M ( numeric_cast<int>( A.rows() ) );
4057 const int N ( numeric_cast<int>( B.columns() ) );
4058 const int K ( numeric_cast<int>( A.columns() ) );
4059 const int lda( numeric_cast<int>( A.spacing() ) );
4060 const int ldb( numeric_cast<int>( B.spacing() ) );
4061 const int ldc( numeric_cast<int>( C.spacing() ) );
4062 const complex<double> alpha( -scalar );
4063 const complex<double> beta ( 1.0, 0.0 );
4065 cblas_zgemm( ( IsRowMajorMatrix<MT3>::value )?( CblasRowMajor ):( CblasColMajor ),
4066 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4067 ( IsRowMajorMatrix<MT3>::value )?( CblasNoTrans ):( CblasTrans ),
4068 M, N, K, &alpha, A.data(), lda, B.data(), ldb, &beta, C.data(), ldc );
4099 template<
typename MT
4101 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4102 smpAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4109 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4110 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4112 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL ) {
4115 else if( left.columns() == 0UL ) {
4147 template<
typename MT
4149 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4150 smpAssign( SparseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4154 typedef typename SelectType< SO, OppositeType, ResultType >::Type TmpType;
4166 const TmpType tmp( rhs );
4185 template<
typename MT
4187 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4188 smpAddAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4195 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4196 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4198 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4234 template<
typename MT
4236 friend inline typename EnableIf< UseSMPAssign<MT> >::Type
4237 smpSubAssign( DenseMatrix<MT,SO>& lhs,
const DMatScalarMultExpr& rhs )
4244 typename MMM::LeftOperand left ( rhs.matrix_.leftOperand() );
4245 typename MMM::RightOperand right( rhs.matrix_.rightOperand() );
4247 if( (~lhs).rows() == 0UL || (~lhs).
columns() == 0UL || left.columns() == 0UL ) {
4326 template<
typename T1
4328 inline const DMatDMatMultExpr<T1,T2>
4334 throw std::invalid_argument(
"Matrix sizes do not match" );
4351 template<
typename MT1,
typename MT2,
typename VT >
4356 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4357 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4358 IsDenseVector<VT>::value && IsColumnVector<VT>::value
4359 ,
typename DMatDVecMultExprTrait< MT1, typename DMatDVecMultExprTrait<MT2,VT>::Type >::Type
4360 , INVALID_TYPE >::Type Type;
4369 template<
typename MT1,
typename MT2,
typename VT >
4374 typedef typename SelectType< IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4375 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value &&
4376 IsSparseVector<VT>::value && IsColumnVector<VT>::value
4377 ,
typename DMatDVecMultExprTrait< MT1, typename DMatSVecMultExprTrait<MT2,VT>::Type >::Type
4378 , INVALID_TYPE >::Type Type;
4387 template<
typename VT,
typename MT1,
typename MT2 >
4392 typedef typename SelectType< IsDenseVector<VT>::value && IsRowVector<VT>::value &&
4393 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4394 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4395 ,
typename TDVecDMatMultExprTrait< typename TDVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4396 , INVALID_TYPE >::Type Type;
4405 template<
typename VT,
typename MT1,
typename MT2 >
4410 typedef typename SelectType< IsSparseVector<VT>::value && IsRowVector<VT>::value &&
4411 IsDenseMatrix<MT1>::value && IsRowMajorMatrix<MT1>::value &&
4412 IsDenseMatrix<MT2>::value && IsRowMajorMatrix<MT2>::value
4413 ,
typename TDVecDMatMultExprTrait< typename TSVecDMatMultExprTrait<VT,MT1>::Type, MT2 >::Type
4414 , INVALID_TYPE >::Type Type;
4423 template<
typename MT1,
typename MT2,
bool AF >
4428 typedef typename MultExprTrait< typename SubmatrixExprTrait<const MT1,AF>::Type
4429 ,
typename SubmatrixExprTrait<const MT2,AF>::Type >::Type Type;
4438 template<
typename MT1,
typename MT2 >
4443 typedef typename MultExprTrait< typename RowExprTrait<const MT1>::Type, MT2 >::Type Type;
4452 template<
typename MT1,
typename MT2 >
4457 typedef typename MultExprTrait< MT1, typename ColumnExprTrait<const MT2>::Type >::Type Type;
MT2::CompositeType CT2
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:126
Compile time check whether the given type is a computational expression template.This type trait clas...
Definition: IsComputation.h:89
Constraint on the data type.
void reset(DynamicMatrix< Type, SO > &m)
Resetting the given dense matrix.
Definition: DynamicMatrix.h:4599
EnableIf< IsIntegral< T > >::Type store(T *address, const typename Store< T, sizeof(T)>::Type &value)
Aligned store of a vector of integral values.
Definition: Store.h:223
EnableIf< IsIntegral< T >, Load< T, sizeof(T)> >::Type::Type load(const T *address)
Loads a vector of integral values.
Definition: Load.h:222
#define BLAZE_BLAS_IS_PARALLEL
Compilation switch for the parallel BLAS mode.This compilation switch specifies whether the used BLAS...
Definition: BLAS.h:86
const DMatDMatMultExpr< T1, T2 > operator*(const DenseMatrix< T1, false > &lhs, const DenseMatrix< T2, false > &rhs)
Multiplication operator for the multiplication of two row-major dense matrices ( ).
Definition: DMatDMatMultExpr.h:4329
Header file for the SparseVector base class.
RT1::ElementType ET1
Element type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:123
void smpSubAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP subtraction assignment of a matrix to dense matrix.
Definition: DenseMatrix.h:152
Efficient implementation of a compressed matrix.The CompressedMatrix class template is the represent...
Definition: CompressedMatrix.h:199
SelectType< IsExpression< MT2 >::value, const MT2, const MT2 & >::Type RightOperand
Composite type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:259
size_t rows() const
Returns the current number of rows of the matrix.
Definition: DMatDMatMultExpr.h:331
#define BLAZE_CONSTRAINT_MUST_BE_DENSE_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a dense, N-dimensional matrix type...
Definition: DenseMatrix.h:79
Header file for the ColumnExprTrait class template.
DMatDMatMultExpr< MT1, MT2 > This
Type of this DMatDMatMultExpr instance.
Definition: DMatDMatMultExpr.h:246
Header file for the IsSame and IsStrictlySame type traits.
Constraint on the data type.
ResultType::ElementType ElementType
Resulting element type.
Definition: DMatDMatMultExpr.h:250
const This & CompositeType
Data type for composite expression templates.
Definition: CompressedMatrix.h:2408
Header file for the IsRowVector type trait.
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:251
Header file for the DenseVector base class.
CompressedMatrix< Type,!SO > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:249
SelectType< IsExpression< MT1 >::value, const MT1, const MT1 & >::Type LeftOperand
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:256
const DMatSerialExpr< MT, SO > serial(const DenseMatrix< MT, SO > &dm)
Forces the serial evaluation of the given dense matrix expression dm.
Definition: DMatSerialExpr.h:690
Header file for the Computation base class.
Header file for the MatMatMultExpr base class.
Type relationship analysis.This class tests if the two data types A and B are equal. For this type comparison, the cv-qualifiers of both data types are ignored. If A and B are the same data type (ignoring the cv-qualifiers), then the value member enumeration is set to 1, the nested type definition Type is TrueType, and the class derives from TrueType. Otherwise value is set to 0, Type is FalseType, and the class derives from FalseType.
Definition: IsSame.h:158
Expression object for dense matrix-dense matrix multiplications.The DMatDMatMultExpr class represents...
Definition: DMatDMatMultExpr.h:115
CompressedMatrix< Type, false > OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: CompressedMatrix.h:2404
Header file for the IsFloat type trait.
Base class for dense matrices.The DenseMatrix class is a base class for all dense matrix classes...
Definition: DenseMatrix.h:70
Constraint on the data type.
#define BLAZE_CONSTRAINT_MUST_BE_FLOAT_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Float.h:80
Constraint on the data type.
RT2::ElementType ET2
Element type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:124
Constraint on the data type.
Header file for the MultExprTrait class template.
void smpAddAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP addition assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:122
Compile time check to query the requirement to evaluate an expression.Via this type trait it is possi...
Definition: RequiresEvaluation.h:90
LeftOperand leftOperand() const
Returns the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:351
Compile time type selection.The SelectType class template selects one of the two given types T1 and T...
Definition: SelectType.h:59
Header file for the DisableIf class template.
Header file for the multiplication trait.
Header file for the IsDouble type trait.
#define BLAZE_CONSTRAINT_MUST_BE_COLUMN_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a column-major dense or sparse matri...
Definition: StorageOrder.h:161
#define BLAZE_CONSTRAINT_MUST_BE_DOUBLE_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is of type...
Definition: Double.h:80
IntrinsicTrait< ElementType >::Type IntrinsicType
Resulting intrinsic element type.
Definition: DMatDMatMultExpr.h:251
bool isAligned() const
Returns whether the operands of the expression are properly aligned in memory.
Definition: DMatDMatMultExpr.h:395
Header file for the DenseMatrix base class.
void assign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the assignment of a matrix to a matrix.
Definition: Matrix.h:271
const size_t SMP_DMATDMATMULT_THRESHOLD
SMP row-major dense matrix/row-major dense matrix multiplication threshold.This threshold specifies w...
Definition: Thresholds.h:834
const size_t DMATDMATMULT_THRESHOLD
Row-major dense matrix/row-major dense matrix multiplication threshold.This setting specifies the thr...
Definition: Thresholds.h:125
Header file for the DMatDVecMultExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_SAME_TYPE(A, B)
Data type constraint.In case the two types A and B are not the same (ignoring all cv-qualifiers of bo...
Definition: SameType.h:89
#define BLAZE_BLAS_MODE
Compilation switch for the BLAS mode.This compilation switch enables/disables the BLAS mode...
Definition: BLAS.h:65
#define BLAZE_CONSTRAINT_MUST_BE_REFERENCE_TYPE(T)
Constraint on the data type.In case the given data type T is not a reference type, a compilation error is created.
Definition: Reference.h:78
Constraints on the storage order of matrix types.
DMatDMatMultExpr(const MT1 &lhs, const MT2 &rhs)
Constructor for the DMatDMatMultExpr class.
Definition: DMatDMatMultExpr.h:286
Type ElementType
Type of the sparse matrix elements.
Definition: CompressedMatrix.h:2406
Header file for the SelectType class template.
Header file for the RowExprTrait class template.
Header file for all forward declarations for expression class templates.
bool canSMPAssign() const
Returns whether the expression can be used in SMP assignments.
Definition: DMatDMatMultExpr.h:405
Header file for the IsDenseMatrix type trait.
ResultType::OppositeType OppositeType
Result type with opposite storage order for expression template evaluations.
Definition: DMatDMatMultExpr.h:248
Header file for the EnableIf class template.
bool canAlias(const T *alias) const
Returns whether the expression can alias with the given address alias.
Definition: DMatDMatMultExpr.h:373
Header file for the serial shim.
void smpAssign(DenseMatrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the SMP assignment of a matrix to a dense matrix.
Definition: DenseMatrix.h:92
Header file for the IsNumeric type trait.
RightOperand rightOperand() const
Returns the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:361
ReturnType operator()(size_t i, size_t j) const
2D-access to the matrix elements.
Definition: DMatDMatMultExpr.h:301
LeftOperand lhs_
Left-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:414
Base class for all matrix/matrix multiplication expression templates.The MatMatMultExpr class serves ...
Definition: MatMatMultExpr.h:65
Header file for the IsSparseVector type trait.
Header file for the SubmatrixExprTrait class template.
#define BLAZE_CONSTRAINT_MUST_BE_ROW_MAJOR_MATRIX_TYPE(T)
Constraint on the data type.In case the given data type T is not a row-major dense or sparse matrix t...
Definition: StorageOrder.h:81
System settings for the BLAS mode.
Header file for the MatScalarMultExpr base class.
Intrinsic characteristics of data types.The IntrinsicTrait class template provides the intrinsic char...
Definition: IntrinsicTrait.h:748
MultTrait< RT1, RT2 >::Type ResultType
Result type for expression template evaluations.
Definition: DMatDMatMultExpr.h:247
Header file for run time assertion macros.
Utility type for generic codes.
Base template for the MultTrait class.
Definition: MultTrait.h:141
void addAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the addition assignment of a matrix to a matrix.
Definition: Matrix.h:301
#define BLAZE_CONSTRAINT_MUST_BE_NUMERIC_TYPE(T)
Constraint on the data type.In case the given data type T is not a numeric (integral or floating poin...
Definition: Numeric.h:79
Header file for the reset shim.
void subAssign(Matrix< MT1, SO1 > &lhs, const Matrix< MT2, SO2 > &rhs)
Default implementation of the subtraction assignment of a matrix to matrix.
Definition: Matrix.h:331
const ResultType CompositeType
Data type for composite expression templates.
Definition: DMatDMatMultExpr.h:253
size_t columns() const
Returns the current number of columns of the matrix.
Definition: DMatDMatMultExpr.h:341
ResultType::TransposeType TransposeType
Transpose type for expression template evaluations.
Definition: DMatDMatMultExpr.h:249
SelectType< evaluateRight, const RT2, CT2 >::Type RT
Type for the assignment of the right-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:265
#define BLAZE_CONSTRAINT_MATRICES_MUST_HAVE_SAME_STORAGE_ORDER(T1, T2)
Constraint on the data type.In case either of the two given data types T1 or T2 is not a matrix type ...
Definition: StorageOrder.h:283
Header file for the IsDenseVector type trait.
Header file for all intrinsic functionality.
SelectType< evaluateLeft, const RT1, CT1 >::Type LT
Type for the assignment of the left-hand side dense matrix operand.
Definition: DMatDMatMultExpr.h:262
const ElementType ReturnType
Return type for expression template evaluations.
Definition: DMatDMatMultExpr.h:252
#define BLAZE_CONSTRAINT_MUST_BE_COMPLEX_TYPE(T)
Constraint on the data type.This compile time constraint checks that the given data type T is a compl...
Definition: Complex.h:80
Header file for the IsRowMajorMatrix type trait.
Header file for the IsComputation type trait class.
CompressedMatrix< Type,!SO > TransposeType
Transpose type for expression template evaluations.
Definition: CompressedMatrix.h:250
Base class for all compute expression templates.The Computation class serves as a tag for all computa...
Definition: Computation.h:59
Header file for the TDVecDMatMultExprTrait class template.
MT1::ResultType RT1
Result type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:121
#define BLAZE_FUNCTION_TRACE
Function trace macro.This macro can be used to reliably trace function calls. In case function tracin...
Definition: FunctionTrace.h:157
This ResultType
Result type for expression template evaluations.
Definition: CompressedMatrix.h:2403
Header file for basic type definitions.
Header file for the IsComplex type trait.
Header file for the TSVecDMatMultExprTrait class template.
Header file for the complex data type.
MT1::CompositeType CT1
Composite type of the left-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:125
MT2::ResultType RT2
Result type of the right-hand side dense matrix expression.
Definition: DMatDMatMultExpr.h:122
Header file for the DMatSVecMultExprTrait class template.
Header file for the IsColumnVector type trait.
RightOperand rhs_
Right-hand side dense matrix of the multiplication expression.
Definition: DMatDMatMultExpr.h:415
Header file for the IsResizable type trait.
Constraint on the data type.
Header file for the thresholds for matrix/vector and matrix/matrix multiplications.
#define BLAZE_INTERNAL_ASSERT(expr, msg)
Run time assertion macro for internal checks.In case of an invalid run time expression, the program execution is terminated. The BLAZE_INTERNAL_ASSERT macro can be disabled by setting the BLAZE_USER_ASSERTION flag to zero or by defining NDEBUG during the compilation.
Definition: Assert.h:101
EnableIf< IsIntegral< T >, Set< T, sizeof(T)> >::Type::Type set(T value)
Sets all values in the vector to the given integral value.
Definition: Set.h:209
bool isAliased(const T *alias) const
Returns whether the expression is aliased with the given address alias.
Definition: DMatDMatMultExpr.h:385
Header file for the IsExpression type trait class.
Header file for the FunctionTrace class.